├── .gitignore
├── DDL.sql
├── LICENSE
├── MemoryVectorIndex
    ├── MemoryVectorIndex.cs
    └── MemoryVectorIndex.csproj
├── MempryVectorIndex.Tests
    ├── MemoryVectorIndexTests.cs
    └── MempryVectorIndex.Tests.csproj
├── README.md
├── VectorIndex.MainTest
    ├── Program.cs
    └── VectorIndex.MainTest.csproj
├── VectorIndex
    ├── FileRangeStore.cs
    ├── IRangeStore.cs
    ├── IndexBuilder.cs
    ├── MemoryRangeStore.cs
    ├── RangeValue.cs
    ├── Stats.cs
    └── VectorIndex.csproj
└── vector-database.sln


/.gitignore:
--------------------------------------------------------------------------------
  1 | ﻿dotnet/.config
  2 | 
  3 | ## Ignore Visual Studio temporary files, build results, and
  4 | ## files generated by popular Visual Studio add-ons.
  5 | ##
  6 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
  7 | 
  8 | # User-specific files
  9 | *.rsuser
 10 | *.suo
 11 | *.user
 12 | *.userosscache
 13 | *.sln.docstates
 14 | 
 15 | # User-specific files (MonoDevelop/Xamarin Studio)
 16 | *.userprefs
 17 | 
 18 | # Mono auto generated files
 19 | mono_crash.*
 20 | 
 21 | # Build results
 22 | [Dd]ebug/
 23 | [Dd]ebugPublic/
 24 | [Rr]elease/
 25 | [Rr]eleases/
 26 | x64/
 27 | x86/
 28 | [Ww][Ii][Nn]32/
 29 | [Aa][Rr][Mm]/
 30 | [Aa][Rr][Mm]64/
 31 | bld/
 32 | [Bb]in/
 33 | [Oo]bj/
 34 | [Ll]og/
 35 | [Ll]ogs/
 36 | 
 37 | # Visual Studio 2015/2017 cache/options directory
 38 | .vs/
 39 | # Uncomment if you have tasks that create the project's static files in wwwroot
 40 | #wwwroot/
 41 | 
 42 | # Visual Studio 2017 auto generated files
 43 | Generated\ Files/
 44 | 
 45 | # MSTest test Results
 46 | [Tt]est[Rr]esult*/
 47 | [Bb]uild[Ll]og.*
 48 | 
 49 | # NUnit
 50 | *.VisualState.xml
 51 | TestResult.xml
 52 | nunit-*.xml
 53 | 
 54 | # Build Results of an ATL Project
 55 | [Dd]ebugPS/
 56 | [Rr]eleasePS/
 57 | dlldata.c
 58 | 
 59 | # Benchmark Results
 60 | BenchmarkDotNet.Artifacts/
 61 | 
 62 | # .NET Core
 63 | project.lock.json
 64 | project.fragment.lock.json
 65 | artifacts/
 66 | 
 67 | # ASP.NET Scaffolding
 68 | ScaffoldingReadMe.txt
 69 | 
 70 | # StyleCop
 71 | StyleCopReport.xml
 72 | 
 73 | # Files built by Visual Studio
 74 | *_i.c
 75 | *_p.c
 76 | *_h.h
 77 | *.ilk
 78 | *.meta
 79 | *.obj
 80 | *.iobj
 81 | *.pch
 82 | *.pdb
 83 | *.ipdb
 84 | *.pgc
 85 | *.pgd
 86 | *.rsp
 87 | *.sbr
 88 | *.tlb
 89 | *.tli
 90 | *.tlh
 91 | *.tmp
 92 | *.tmp_proj
 93 | *_wpftmp.csproj
 94 | *.log
 95 | *.tlog
 96 | *.vspscc
 97 | *.vssscc
 98 | .builds
 99 | *.pidb
100 | *.svclog
101 | *.scc
102 | 
103 | # Chutzpah Test files
104 | _Chutzpah*
105 | 
106 | # Visual C++ cache files
107 | ipch/
108 | *.aps
109 | *.ncb
110 | *.opendb
111 | *.opensdf
112 | *.sdf
113 | *.cachefile
114 | *.VC.db
115 | *.VC.VC.opendb
116 | 
117 | # Visual Studio profiler
118 | *.psess
119 | *.vsp
120 | *.vspx
121 | *.sap
122 | 
123 | # Visual Studio Trace Files
124 | *.e2e
125 | 
126 | # TFS 2012 Local Workspace
127 | $tf/
128 | 
129 | # Guidance Automation Toolkit
130 | *.gpState
131 | 
132 | # ReSharper is a .NET coding add-in
133 | _ReSharper*/
134 | *.[Rr]e[Ss]harper
135 | *.DotSettings.user
136 | 
137 | # TeamCity is a build add-in
138 | _TeamCity*
139 | 
140 | # DotCover is a Code Coverage Tool
141 | *.dotCover
142 | 
143 | # AxoCover is a Code Coverage Tool
144 | .axoCover/*
145 | !.axoCover/settings.json
146 | 
147 | # Coverlet is a free, cross platform Code Coverage Tool
148 | coverage*.json
149 | coverage*.xml
150 | coverage*.info
151 | 
152 | # Visual Studio code coverage results
153 | *.coverage
154 | *.coveragexml
155 | 
156 | # NCrunch
157 | _NCrunch_*
158 | .*crunch*.local.xml
159 | nCrunchTemp_*
160 | 
161 | # MightyMoose
162 | *.mm.*
163 | AutoTest.Net/
164 | 
165 | # Web workbench (sass)
166 | .sass-cache/
167 | 
168 | # Installshield output folder
169 | [Ee]xpress/
170 | 
171 | # DocProject is a documentation generator add-in
172 | DocProject/buildhelp/
173 | DocProject/Help/*.HxT
174 | DocProject/Help/*.HxC
175 | DocProject/Help/*.hhc
176 | DocProject/Help/*.hhk
177 | DocProject/Help/*.hhp
178 | DocProject/Help/Html2
179 | DocProject/Help/html
180 | 
181 | # Click-Once directory
182 | publish/
183 | 
184 | # Publish Web Output
185 | *.[Pp]ublish.xml
186 | *.azurePubxml
187 | # Note: Comment the next line if you want to checkin your web deploy settings,
188 | # but database connection strings (with potential passwords) will be unencrypted
189 | *.pubxml
190 | *.publishproj
191 | 
192 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
193 | # checkin your Azure Web App publish settings, but sensitive information contained
194 | # in these scripts will be unencrypted
195 | PublishScripts/
196 | 
197 | # NuGet Packages
198 | *.nupkg
199 | # NuGet Symbol Packages
200 | *.snupkg
201 | # The packages folder can be ignored because of Package Restore
202 | **/[Pp]ackages/*
203 | # except build/, which is used as an MSBuild target.
204 | !**/[Pp]ackages/build/
205 | # Uncomment if necessary however generally it will be regenerated when needed
206 | #!**/[Pp]ackages/repositories.config
207 | # NuGet v3's project.json files produces more ignorable files
208 | *.nuget.props
209 | *.nuget.targets
210 | 
211 | # Microsoft Azure Build Output
212 | csx/
213 | *.build.csdef
214 | 
215 | # Microsoft Azure Emulator
216 | ecf/
217 | rcf/
218 | 
219 | # Windows Store app package directories and files
220 | AppPackages/
221 | BundleArtifacts/
222 | Package.StoreAssociation.xml
223 | _pkginfo.txt
224 | *.appx
225 | *.appxbundle
226 | *.appxupload
227 | 
228 | # Visual Studio cache files
229 | # files ending in .cache can be ignored
230 | *.[Cc]ache
231 | # but keep track of directories ending in .cache
232 | !?*.[Cc]ache/
233 | 
234 | # Others
235 | ClientBin/
236 | ~$*
237 | *~
238 | *.dbmdl
239 | *.dbproj.schemaview
240 | *.jfm
241 | *.pfx
242 | *.publishsettings
243 | orleans.codegen.cs
244 | 
245 | # Including strong name files can present a security risk
246 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
247 | #*.snk
248 | 
249 | # Since there are multiple workflows, uncomment next line to ignore bower_components
250 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
251 | #bower_components/
252 | 
253 | # RIA/Silverlight projects
254 | Generated_Code/
255 | 
256 | # Backup & report files from converting an old project file
257 | # to a newer Visual Studio version. Backup files are not needed,
258 | # because we have git ;-)
259 | _UpgradeReport_Files/
260 | Backup*/
261 | UpgradeLog*.XML
262 | UpgradeLog*.htm
263 | ServiceFabricBackup/
264 | *.rptproj.bak
265 | 
266 | # SQL Server files
267 | *.mdf
268 | *.ldf
269 | *.ndf
270 | 
271 | # Business Intelligence projects
272 | *.rdl.data
273 | *.bim.layout
274 | *.bim_*.settings
275 | *.rptproj.rsuser
276 | *- [Bb]ackup.rdl
277 | *- [Bb]ackup ([0-9]).rdl
278 | *- [Bb]ackup ([0-9][0-9]).rdl
279 | 
280 | # Microsoft Fakes
281 | FakesAssemblies/
282 | 
283 | # GhostDoc plugin setting file
284 | *.GhostDoc.xml
285 | 
286 | # Node.js Tools for Visual Studio
287 | .ntvs_analysis.dat
288 | node_modules/
289 | 
290 | # Visual Studio 6 build log
291 | *.plg
292 | 
293 | # Visual Studio 6 workspace options file
294 | *.opt
295 | 
296 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
297 | *.vbw
298 | 
299 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
300 | *.vbp
301 | 
302 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
303 | *.dsw
304 | *.dsp
305 | 
306 | # Visual Studio 6 technical files
307 | *.ncb
308 | *.aps
309 | 
310 | # Visual Studio LightSwitch build output
311 | **/*.HTMLClient/GeneratedArtifacts
312 | **/*.DesktopClient/GeneratedArtifacts
313 | **/*.DesktopClient/ModelManifest.xml
314 | **/*.Server/GeneratedArtifacts
315 | **/*.Server/ModelManifest.xml
316 | _Pvt_Extensions
317 | 
318 | # Paket dependency manager
319 | .paket/paket.exe
320 | paket-files/
321 | 
322 | # FAKE - F# Make
323 | .fake/
324 | 
325 | # CodeRush personal settings
326 | .cr/personal
327 | 
328 | # Python Tools for Visual Studio (PTVS)
329 | __pycache__/
330 | *.pyc
331 | 
332 | # Cake - Uncomment if you are using it
333 | # tools/**
334 | # !tools/packages.config
335 | 
336 | # Tabs Studio
337 | *.tss
338 | 
339 | # Telerik's JustMock configuration file
340 | *.jmconfig
341 | 
342 | # BizTalk build output
343 | *.btp.cs
344 | *.btm.cs
345 | *.odx.cs
346 | *.xsd.cs
347 | 
348 | # OpenCover UI analysis results
349 | OpenCover/
350 | 
351 | # Azure Stream Analytics local run output
352 | ASALocalRun/
353 | 
354 | # MSBuild Binary and Structured Log
355 | *.binlog
356 | 
357 | # NVidia Nsight GPU debugger configuration file
358 | *.nvuser
359 | 
360 | # MFractors (Xamarin productivity tool) working folder
361 | .mfractor/
362 | 
363 | # Local History for Visual Studio
364 | .localhistory/
365 | 
366 | # Visual Studio History (VSHistory) files
367 | .vshistory/
368 | 
369 | # BeatPulse healthcheck temp database
370 | healthchecksdb
371 | 
372 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
373 | MigrationBackup/
374 | 
375 | # Ionide (cross platform F# VS Code tools) working folder
376 | .ionide/
377 | 
378 | # Fody - auto-generated XML schema
379 | FodyWeavers.xsd
380 | 
381 | # VS Code files for those working on multiple tools
382 | .vscode/*
383 | !.vscode/settings.json
384 | !.vscode/tasks.json
385 | !.vscode/launch.json
386 | !.vscode/extensions.json
387 | *.code-workspace
388 | 
389 | # Local History for Visual Studio Code
390 | .history/
391 | 
392 | # Windows Installer files from build outputs
393 | *.cab
394 | *.msi
395 | *.msix
396 | *.msm
397 | *.msp
398 | 
399 | # JetBrains Rider
400 | *.sln.iml
401 | *.tmp
402 | *.log
403 | *.bck
404 | *.tgz
405 | *.tar
406 | *.zip
407 | *.cer
408 | *.crt
409 | *.key
410 | *.pem
411 | 
412 | .env
413 | certs/
414 | launchSettings.json
415 | config.development.yaml
416 | *.development.config
417 | *.development.json
418 | .DS_Store
419 | .idea/
420 | node_modules/
421 | obj/
422 | bin/
423 | _dev/
424 | .dev/
425 | *.devis.*
426 | .vs/
427 | *.user
428 | **/.vscode/chrome
429 | **/.vscode/.ropeproject/objectdb
430 | *.pyc
431 | .ipynb_checkpoints
432 | .jython_cache/
433 | __pycache__/
434 | .mypy_cache/
435 | __pypackages__/
436 | .pdm.toml
437 | global.json
438 | 
439 | # doxfx
440 | **/DROP/
441 | **/TEMP/
442 | **/packages/
443 | **/bin/
444 | **/obj/
445 | _site
446 | 
447 | # Yarn
448 | .yarn
449 | .yarnrc.yml
450 | 
451 | # Python Environments
452 | .env
453 | .venv
454 | .myenv
455 | env/
456 | venv/
457 | myvenv/
458 | ENV/
459 | 
460 | # Python dist
461 | dist/
462 | 
463 | # Peristant storage
464 | data/qdrant
465 | data/chatstore*
466 | 
467 | # Java build
468 | java/**/target
469 | java/.mvn/wrapper/maven-wrapper.jar
470 | 
471 | # Java settings
472 | conf.properties
473 | /data
474 | 


--------------------------------------------------------------------------------
/DDL.sql:
--------------------------------------------------------------------------------
  1 | USE [Vectors]
  2 | GO
  3 | /****** Object:  UserDefinedTableType [dbo].[PointType]    Script Date: 28/06/2023 17:06:20 ******/
  4 | CREATE TYPE [dbo].[PointType] AS TABLE(
  5 | 	[ID] [bigint] NOT NULL,
  6 | 	[Idx] [smallint] NOT NULL,
  7 | 	[Value] [real] NULL,
  8 | 	PRIMARY KEY CLUSTERED 
  9 | (
 10 | 	[ID] ASC,
 11 | 	[Idx] ASC
 12 | )WITH (IGNORE_DUP_KEY = OFF)
 13 | )
 14 | GO
 15 | /****** Object:  UserDefinedTableType [dbo].[RangeType]    Script Date: 28/06/2023 17:06:20 ******/
 16 | CREATE TYPE [dbo].[RangeType] AS TABLE(
 17 | 	[RangeID] [bigint] NOT NULL,
 18 | 	[Dimension] [smallint] NULL,
 19 | 	[Mid] [real] NULL,
 20 | 	[LowRangeID] [bigint] NULL,
 21 | 	[HighRangeID] [bigint] NULL,
 22 | 	[ID] [bigint] NULL,
 23 | 	PRIMARY KEY CLUSTERED 
 24 | (
 25 | 	[RangeID] ASC
 26 | )WITH (IGNORE_DUP_KEY = OFF)
 27 | )
 28 | GO
 29 | /****** Object:  UserDefinedFunction [dbo].[BuildIndex]    Script Date: 28/06/2023 17:06:20 ******/
 30 | SET ANSI_NULLS ON
 31 | GO
 32 | SET QUOTED_IDENTIFIER ON
 33 | GO
 34 | 
 35 | USE [Vectors]
 36 | GO
 37 | /****** Object:  UserDefinedFunction [dbo].[BuildIndex]    Script Date: 04/07/2023 12:54:24 ******/
 38 | SET ANSI_NULLS ON
 39 | GO
 40 | SET QUOTED_IDENTIFIER ON
 41 | GO
 42 | 
 43 | -- Builds range index for points.
 44 | create function [dbo].[BuildIndex]
 45 | (
 46 |   -- a points table to build range index.
 47 |   @points dbo.PointType readonly
 48 | )
 49 | returns @index table 
 50 | (
 51 | 	RangeID bigint not null primary key,
 52 | 	Dimension smallint null,
 53 | 	Mid real null,
 54 | 	LowRangeID bigint null,
 55 | 	HighRangeID bigint null,
 56 | 	ID bigint null
 57 | )
 58 | as
 59 | begin
 60 |   declare @ranges table
 61 | 	(
 62 | 		ID bigint,
 63 | 		RangeID bigint,
 64 | 		primary key(RangeID, ID)
 65 | 	);
 66 | 
 67 |   declare @stats table
 68 |   (
 69 |     RangeID bigint not null primary key,
 70 |     Idx smallint not null,
 71 |     Mean real not null,
 72 |     [Stdev] real,
 73 |     Count bigint not null,
 74 |     ID bigint not null
 75 |   );
 76 | 
 77 | --raiserror(N'Level 0.', 0, 0) with nowait;
 78 | 
 79 |   insert into @stats(RangeID, Idx, Mean, Stdev, Count, ID)
 80 |   select top 1
 81 |     0,
 82 |     Idx,
 83 |     avg(Value),
 84 |     isnull(stdev(Value), 0) Stdev,
 85 |     count_big(*),
 86 |     avg(ID)
 87 |   from
 88 |     @points
 89 |   group by
 90 |     Idx
 91 |   order by
 92 |     Stdev desc
 93 | 
 94 |   declare @next bit = @@rowcount;
 95 | 
 96 |   if (@next != 0)
 97 |   begin
 98 |     insert @ranges(RangeID, ID)
 99 |     select
100 |       iif(S.Stdev = 0, iif(P.ID <= S.ID, 1, 2), iif(Value < Mean, 1, 2)),
101 |       P.ID
102 |     from
103 |       @points P
104 |       join
105 |       @stats S
106 |       on
107 |         P.Idx = S.Idx and
108 |         S.Count > 1;
109 | 
110 |     set @next = @@rowcount;
111 |     declare @level bigint = 0;
112 |     declare @i tinyint = 0;
113 | 
114 |     while(@next != 0)
115 |     begin
116 | --raiserror(N'Level %i.', 0, 0, @level) with nowait;
117 | 
118 |       insert into @stats(RangeID, Idx, Mean, Stdev, Count, ID)
119 |       select
120 |         S.RangeID * 2 + N.I, 
121 |         R.Idx, 
122 |         R.Mean, 
123 |         R.Stdev, 
124 |         R.Count, 
125 |         R.ID
126 |       from
127 |         @stats S
128 |         join
129 |         (select 1 union all select 2) N(I)
130 |         on
131 |           S.RangeID >= @level and
132 |           S.Count > 1
133 |         cross apply
134 |         (
135 |           select top 1
136 |             P.Idx,
137 |             avg(P.Value) Mean,
138 |             isnull(stdev(P.Value), 0) Stdev,
139 |             count_big(*) Count,
140 |             avg(P.ID) ID
141 |           from
142 |             @ranges R
143 |             join
144 |             @points P
145 |             on
146 |               P.ID = R.ID and
147 |               R.RangeID = S.RangeID * 2 + N.I
148 |           group by
149 |             Idx
150 |           order by
151 |             iif(@level % 2 = 1, Stdev, -Stdev) desc
152 |         ) R;
153 | 
154 |       set @level = @level * 2 + 1;
155 |       set @i += 1;
156 | 
157 |       with R as
158 |       (
159 |         select
160 |           R.*,
161 |           R.RangeID * 2 + 
162 |             case
163 |               when Value < Mean then 1
164 |               when Value > Mean then 2
165 |               when R.ID <= S.ID then 1
166 |               else 2
167 |             end NewRangeID
168 |         from
169 |           @ranges R
170 |           join
171 |           @stats S
172 |           on
173 |             S.RangeID = R.RangeID and
174 |             S.RangeID >= @level and
175 |             S.Count > 1
176 |           join
177 |           @points P
178 |           on
179 |             P.ID = R.ID and
180 |             P.Idx = S.Idx
181 |       )
182 |       update R
183 |       set 
184 |         RangeID = NewRangeID;
185 | 
186 |       set @next = @@rowcount;
187 |     end;
188 |   end;
189 | 
190 |   insert into @index(RangeID, Dimension, Mid, LowRangeID, HighRangeID, ID)
191 |   select
192 |     RangeID,
193 |     iif(Stdev = 0, null, Idx) Dimension,
194 |     iif(Stdev = 0, null, Mean) Mid,
195 |     iif(Count = 1, null, RangeID * 2 + 1) LowRangeID,
196 |     iif(Count = 1, null, RangeID * 2 + 2) HighRangeID,
197 |     iif(Count = 1, ID, null) ID
198 |   from
199 |     @stats;
200 | 
201 |   return;
202 | end
203 | GO
204 | /****** Object:  Table [dbo].[TextIndex]    Script Date: 28/06/2023 17:06:20 ******/
205 | SET ANSI_NULLS ON
206 | GO
207 | SET QUOTED_IDENTIFIER ON
208 | GO
209 | CREATE TABLE [dbo].[TextIndex](
210 | 	[DocID] [bigint] NOT NULL,
211 | 	[RangeID] [bigint] NOT NULL,
212 | 	[Dimension] [smallint] NULL,
213 | 	[Mid] [real] NULL,
214 | 	[LowRangeID] [bigint] NULL,
215 | 	[HighRangeID] [bigint] NULL,
216 | 	[TextID] [bigint] NULL,
217 |  CONSTRAINT [PK_TextIndex] PRIMARY KEY CLUSTERED 
218 | (
219 | 	[RangeID] ASC,
220 | 	[DocID] ASC
221 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY],
222 |  CONSTRAINT [IX_TextIndex] UNIQUE NONCLUSTERED 
223 | (
224 | 	[DocID] ASC,
225 | 	[RangeID] ASC
226 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
227 | ) ON [PRIMARY]
228 | GO
229 | /****** Object:  UserDefinedFunction [dbo].[Search]    Script Date: 28/06/2023 17:06:20 ******/
230 | SET ANSI_NULLS ON
231 | GO
232 | SET QUOTED_IDENTIFIER ON
233 | GO
234 | CREATE FUNCTION [dbo].[Search]
235 | (	
236 | 	-- json array of embedding vector
237 | 	@point nvarchar(max),
238 | 	-- a search domain.
239 | 	@domain real,
240 | 	-- Optional doc id.
241 | 	@docId bigint = null
242 | )
243 | returns table 
244 | as
245 | return 
246 | with Vector as
247 | (
248 |   select 
249 | 	[key] Idx, 
250 | 	value - @domain MinValue, 
251 | 	value + @domain MaxValue 
252 |   from 
253 | 	openjson(@point)
254 | ),
255 | Node as
256 | (
257 | 	select
258 | 	  *
259 | 	from
260 | 	  dbo.TextIndex
261 | 	where
262 | 	  RangeID = 0 and
263 | 	  (@docId is null or DocID = @docId)
264 | 	union all
265 | 	select
266 | 	  I.*
267 | 	from
268 | 	  dbo.TextIndex I
269 | 	  inner join
270 | 	  Node N
271 | 	  on
272 | 		  N.LowRangeID is not null and
273 | 	      I.DocID = N.DocID and
274 | 		  I.RangeID = N.LowRangeID and
275 | 		  (
276 | 			  N.Dimension is null or
277 | 			  N.Mid >= (select MinValue from Vector where Idx = N.Dimension)
278 | 		  )
279 | 	union all
280 | 	select
281 | 	  I.*
282 | 	from
283 | 	  dbo.TextIndex I
284 | 	  inner join
285 | 	  Node N
286 | 	  on
287 | 		  N.HighRangeID is not null and
288 | 	      I.DocID = N.DocID and
289 | 		  I.RangeID = N.HighRangeID and
290 | 		  (
291 | 			  N.Dimension is null or
292 | 			  N.Mid <= (select MaxValue from Vector where Idx = N.Dimension)
293 | 		  )
294 | )
295 | select DocID, TextID from Node where TextID is not null;
296 | GO
297 | /****** Object:  Table [dbo].[Document]    Script Date: 28/06/2023 17:06:20 ******/
298 | SET ANSI_NULLS ON
299 | GO
300 | SET QUOTED_IDENTIFIER ON
301 | GO
302 | CREATE TABLE [dbo].[Document](
303 | 	[DocID] [bigint] NOT NULL,
304 | 	[Name] [nvarchar](256) NULL,
305 |  CONSTRAINT [PK_Documents] PRIMARY KEY CLUSTERED 
306 | (
307 | 	[DocID] ASC
308 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
309 | ) ON [PRIMARY]
310 | GO
311 | /****** Object:  Table [dbo].[Text]    Script Date: 28/06/2023 17:06:20 ******/
312 | SET ANSI_NULLS ON
313 | GO
314 | SET QUOTED_IDENTIFIER ON
315 | GO
316 | CREATE TABLE [dbo].[Text](
317 | 	[DocID] [bigint] NOT NULL,
318 | 	[TextID] [bigint] NOT NULL,
319 | 	[Text] [nvarchar](max) NULL,
320 | 	[Vector] [nvarchar](max) NULL,
321 |  CONSTRAINT [PK_Text] PRIMARY KEY CLUSTERED 
322 | (
323 | 	[DocID] ASC,
324 | 	[TextID] ASC
325 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
326 | ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
327 | GO
328 | ALTER TABLE [dbo].[Document] ADD  CONSTRAINT [DF_Document_DocID]  DEFAULT (NEXT VALUE FOR [dbo].[DocumentID]) FOR [DocID]
329 | GO
330 | ALTER TABLE [dbo].[Text] ADD  CONSTRAINT [DF_Text_TextID_1]  DEFAULT (NEXT VALUE FOR [dbo].[TextID]) FOR [TextID]
331 | GO
332 | ALTER TABLE [dbo].[Text]  WITH CHECK ADD  CONSTRAINT [FK_Text_Document] FOREIGN KEY([DocID])
333 | REFERENCES [dbo].[Document] ([DocID])
334 | ON UPDATE CASCADE
335 | ON DELETE CASCADE
336 | GO
337 | ALTER TABLE [dbo].[Text] CHECK CONSTRAINT [FK_Text_Document]
338 | GO
339 | ALTER TABLE [dbo].[TextIndex]  WITH CHECK ADD  CONSTRAINT [FK_TextIndex_Document] FOREIGN KEY([DocID])
340 | REFERENCES [dbo].[Document] ([DocID])
341 | ON UPDATE CASCADE
342 | ON DELETE CASCADE
343 | GO
344 | ALTER TABLE [dbo].[TextIndex] CHECK CONSTRAINT [FK_TextIndex_Document]
345 | GO
346 | /****** Object:  StoredProcedure [dbo].[IndexDocument]    Script Date: 28/06/2023 17:06:20 ******/
347 | SET ANSI_NULLS ON
348 | GO
349 | SET QUOTED_IDENTIFIER ON
350 | GO
351 | 
352 | CREATE procedure [dbo].[IndexDocument]
353 |   @docID bigint
354 | as
355 | begin
356 | 	set nocount on;
357 | 
358 |   declare @points dbo.PointType;
359 |   declare @index dbo.RangeType;
360 | 
361 | --raiserror(N'Start loading points.', 0, 0, @timespan) with nowait;
362 | 
363 | --set @start = current_timestamp;
364 | 
365 |   insert into @points(ID, Idx, Value)
366 |   select
367 | 	  TextID, [key], value
368 |   from
369 | 	  dbo.Text
370 | 	  cross apply
371 | 	  openjson(Vector)
372 |   where
373 | 	  DocID = @docID;
374 | 
375 | --set @end = current_timestamp;
376 | --set @timespan = datediff(ms, @start, @end);
377 | 
378 | --raiserror(N'Points loaded in %i milliseconds.', 0, 0, @timespan) with nowait;
379 | 
380 | --raiserror(N'Start building index.', 0, 0, @timespan) with nowait;
381 | 
382 | --set @start = current_timestamp;
383 | 
384 | --raiserror(N'Start building index.', 0, 0, @timespan) with nowait;
385 | 
386 | --set @start = current_timestamp;
387 | 
388 |   insert into @index
389 |   select * from dbo.BuildIndex(@points);
390 | 
391 | --set @end = current_timestamp;
392 | --set @timespan = datediff(ms, @start, @end);
393 | 
394 | --raiserror(N'Index built in %i milliseconds.', 0, 0, @timespan) with nowait;
395 | 
396 | 	-- Update index.
397 |   delete from dbo.TextIndex where DocID = @docID;
398 | 
399 |   insert into dbo.TextIndex
400 |   (
401 |     DocID, 
402 |     RangeID, 
403 |     Dimension, 
404 |     Mid, 
405 |     LowRangeID, 
406 |     HighRangeID, 
407 |     TextID
408 |   )
409 |   select
410 |     @docID,
411 |     RangeID,
412 |     Dimension,
413 |     Mid,
414 |     LowRangeID,
415 |     HighRangeID,
416 |     ID
417 |   from
418 |     @index
419 | end
420 | GO
421 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Nesterovsky Bros
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MemoryVectorIndex/MemoryVectorIndex.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System.Collections;
  2 | using System.Numerics;
  3 | 
  4 | namespace NesterovskyBros.VectorIndex;
  5 | 
  6 | /// <summary>
  7 | /// A record index by normalized vectors, such 
  8 | /// that each their components lie in range [-1, 1].
  9 | /// All vectors must be of the same size.
 10 | /// </summary>
 11 | /// <typeparam name="R">A record type associated with vector.</typeparam>
 12 | public class MemoryVectorIndex<R>: IEnumerable<R>
 13 | {
 14 |   /// <summary>
 15 |   /// Creates a vector index.
 16 |   /// </summary>
 17 |   /// <param name="vectorSelector">
 18 |   /// A function returning vector for the record.
 19 |   /// </param>
 20 |   /// <param name="listThreshold">
 21 |   /// A threshold size to store records in list buckets.
 22 |   /// </param>
 23 |   public MemoryVectorIndex(
 24 |     Func<R, ReadOnlyMemory<float>> vectorSelector,
 25 |     int listThreshold = 10)
 26 |   {
 27 |     if (listThreshold <= 0)
 28 |     {
 29 |       throw new ArgumentException(
 30 |         "List threshold must be greater than zero.",
 31 |         nameof(listThreshold));
 32 |     }
 33 | 
 34 |     this.vectorSelector = vectorSelector;
 35 |     this.listThreshold = listThreshold;
 36 |   }
 37 | 
 38 |   /// <summary>
 39 |   /// Creates a vector index.
 40 |   /// </summary>
 41 |   /// <param name="records">A records to add to index.</param>
 42 |   /// <param name="vectorSelector">
 43 |   /// A function returning vector for the record.
 44 |   /// </param>
 45 |   /// <param name="listThreshold">
 46 |   /// A threshold size to store records in list buckets.
 47 |   /// </param>
 48 |   public MemoryVectorIndex(
 49 |     IEnumerable<R> records,
 50 |     Func<R, ReadOnlyMemory<float>> vectorSelector,
 51 |     int listThreshold = 10):
 52 |     this(vectorSelector, listThreshold)
 53 |   {
 54 |     foreach(var record in records)
 55 |     {
 56 |       Add(record);
 57 |     }
 58 |   }
 59 | 
 60 |   /// <summary>
 61 |   /// Number of records.
 62 |   /// </summary>
 63 |   public int Count { get; private set; }
 64 | 
 65 |   /// <inheritdoc/>
 66 |   public IEnumerator<R> GetEnumerator() =>
 67 |     records.Values.SelectMany(items => items).GetEnumerator();
 68 | 
 69 |   /// <inheritdoc/>
 70 |   IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
 71 | 
 72 |   /// <summary>
 73 |   /// Clears the index.
 74 |   /// </summary>
 75 |   public void Clear()
 76 |   {
 77 |     Count = 0;
 78 |     records.Clear();
 79 |     entries.Clear();
 80 |   }
 81 | 
 82 |   /// <summary>
 83 |   /// Adds a record to the index.
 84 |   /// </summary>
 85 |   /// <param name="record">A record to add.</param>
 86 |   public void Add(R record)
 87 |   {
 88 |     var vector = vectorSelector(record).Span;
 89 | 
 90 |     if (entries is [])
 91 |     {
 92 |       if (vector.Length == 0)
 93 |       {
 94 |         throw new ArgumentException("Invalid vector size.", nameof(record));
 95 |       }
 96 | 
 97 |       vectorSize = vector.Length;
 98 |       Count = 1;
 99 |       records[0] = [record];
100 |       entries.Add((-1, -1));
101 | 
102 |       return;
103 |     }
104 | 
105 |     if (vector.Length != vectorSize)
106 |     {
107 |       throw new ArgumentException("Invalid vector size.", nameof(record));
108 |     }
109 | 
110 |     var index = 0;
111 |     var step = 1f;
112 |     var centers = new float[vector.Length];
113 | 
114 |     for(var depth = 0; depth < maxDepth; ++depth)
115 |     {
116 |       step /= 2;
117 | 
118 |       for(var i = 0; i < vector.Length; ++i)
119 |       {
120 |         var (low, high) = entries[index];
121 | 
122 |         if (vector[i] < centers[i])
123 |         {
124 |           if (low >= 0)
125 |           {
126 |             centers[i] -= step;
127 |             index = low;
128 | 
129 |             continue;
130 |           }
131 | 
132 |           if (high >= 0)
133 |           {
134 |             entries[index] = (entries.Count, high);
135 |             records[entries.Count] = [record];
136 |             entries.Add((-1, -1));
137 |             ++Count;
138 | 
139 |             return;
140 |           }
141 |         }
142 |         else
143 |         {
144 |           if (high >= 0)
145 |           {
146 |             centers[i] += step;
147 |             index = high;
148 | 
149 |             continue;
150 |           }
151 | 
152 |           if (low >= 0)
153 |           {
154 |             entries[index] = (low, entries.Count);
155 |             records[entries.Count] = [record];
156 |             entries.Add((-1, -1));
157 |             ++Count;
158 | 
159 |             return;
160 |           }
161 |         }
162 | 
163 |         // This is a leaf.
164 |         var list = records[index];
165 | 
166 |         list.Add(record);
167 |         ++Count;
168 | 
169 |         if (list.Count <= listThreshold || depth >= maxDepth - 1)
170 |         {
171 |           return;
172 |         }
173 | 
174 |         records.Remove(index);
175 | 
176 |         // Split the list;
177 |         List<R> lowList = [];
178 | 
179 |         for(; depth < maxDepth; ++depth)
180 |         {
181 |           for(; i < vector.Length; ++i)
182 |           {
183 |             for(var j = list.Count; j-- > 0;)
184 |             {
185 |               var item = list[j];
186 | 
187 |               if (vectorSelector(item).Span[i] < centers[i])
188 |               {
189 |                 lowList.Add(item);
190 |                 list.RemoveAt(j);
191 |               }
192 |             }
193 | 
194 |             if (lowList is [])
195 |             {
196 |               centers[i] += step;
197 |               entries[index] = (-1, entries.Count);
198 |               index = entries.Count;
199 |               entries.Add((-1, -1));
200 |             }
201 |             else if (list is [])
202 |             {
203 |               centers[i] -= step;
204 |               (lowList, list) = (list, lowList);
205 |               entries[index] = (entries.Count, -1);
206 |               index = entries.Count;
207 |               entries.Add((-1, -1));
208 |             }
209 |             else
210 |             {
211 |               entries[index] = (entries.Count, entries.Count + 1);
212 |               records[entries.Count] = lowList;
213 |               records[entries.Count + 1] = list;
214 |               entries.Add((-1, -1));
215 |               entries.Add((-1, -1));
216 | 
217 |               return;
218 |             }
219 |           }
220 |         }
221 | 
222 |         // Bad distribution, probably not normalized.
223 |         records[index] = list;
224 | 
225 |         return;
226 |       }
227 |     }
228 |   }
229 | 
230 |   /// <summary>
231 |   /// Finds records in the index.
232 |   /// </summary>
233 |   /// <param name="vector">A vector for the neighborhood origin.</param>
234 |   /// <param name="distance">An euclidian distance for the match.</param>
235 |   /// <param name="predicate">A filter predicate.</param>
236 |   /// <returns>A enumeration of matched record.</returns>
237 |   /// <remarks>
238 |   /// Index searches records and discards those that are too far, yet 
239 |   /// predicate may recieve records that are still far enough for the match, 
240 |   /// so predicate should verify the match.
241 |   /// </remarks>
242 |   public IEnumerable<R> Find(
243 |     ReadOnlyMemory<float> vector,
244 |     float distance,
245 |     Func<R, ReadOnlyMemory<float>, bool> predicate)
246 |   {
247 |     if (entries is [])
248 |     {
249 |       yield break;
250 |     }
251 | 
252 |     if (vector.Length != vectorSize)
253 |     {
254 |       throw new ArgumentException("Invalid vector size.", nameof(vector));
255 |     }
256 | 
257 |     var index = 0;
258 |     var centers = new float[vector.Length];
259 |     Stack<(int index, int i, float center, float step, float length)> state =
260 |       [];
261 | 
262 |     state.Push((0, 0, 0, 1, distance * distance));
263 | 
264 |     while(state.TryPeek(out var item))
265 |     {
266 |       (var prev, index) = (index, item.index);
267 |       var (i, center, step) = (item.i, item.center, item.step);
268 |       var (low, high) = entries[index];
269 |       
270 |       centers[i] = center;
271 | 
272 |       if (prev == high)
273 |       {
274 |         state.Pop();
275 | 
276 |         continue;
277 |       }
278 | 
279 |       var delta = vector.Span[i] - center;
280 |       var prevDelta = Math.Max(Math.Abs(delta) - step, 0);
281 | 
282 |       if (prev != low && low != -1)
283 |       {
284 |         var length = delta <= 0 ? item.length :
285 |           item.length + (prevDelta - delta) * (prevDelta + delta);
286 | 
287 |         if (length >= 0)
288 |         {
289 |           var half = step / 2;
290 |           
291 |           centers[i] -= half;
292 | 
293 |           if (++i == vectorSize)
294 |           {
295 |             i = 0;
296 |             step = half;
297 |           }
298 | 
299 |           state.Push((low, i, centers[i], step, length));
300 | 
301 |           continue;
302 |         }
303 |       }
304 | 
305 |       if (high != -1)
306 |       {
307 |         var length = delta >= 0 ? item.length :
308 |           item.length + (prevDelta - delta) * (prevDelta + delta);
309 | 
310 |         if (length >= 0)
311 |         {
312 |           var half = step / 2;
313 | 
314 |           centers[i] += half;
315 | 
316 |           if (++i == vectorSize)
317 |           {
318 |             i = 0;
319 |             step = half;
320 |           }
321 | 
322 |           state.Push((high, i, centers[i], step, length));
323 |         }
324 |         else
325 |         {
326 |           state.Pop();
327 |         }
328 |         
329 |         continue;
330 |       }
331 | 
332 |       state.Pop();
333 |   
334 |       if (low == -1)
335 |       {
336 |         foreach(var record in records[index])
337 |         {
338 |           if (predicate(record, vector))
339 |           {
340 |             yield return record;
341 |           }
342 |         }
343 |       }
344 |     }
345 |   }
346 | 
347 |   /// <summary>
348 |   /// Removes records from the index.
349 |   /// </summary>
350 |   /// <param name="vector">A vector for the neighborhood origin.</param>
351 |   /// <param name="distance">An euclidian distance for the match.</param>
352 |   /// <param name="predicate">A filter predicate.</param>
353 |   /// <remarks>
354 |   /// Index searches records and discards those that are too far, yet 
355 |   /// predicate may recieve records that are still far enough for the match, 
356 |   /// so predicate should verify the match.
357 |   /// </remarks>
358 |   public void Remove(
359 |     ReadOnlyMemory<float> vector,
360 |     float distance,
361 |     Func<R, ReadOnlyMemory<float>, bool> predicate)
362 |   {
363 |     if (entries is [])
364 |     {
365 |       return;
366 |     }
367 | 
368 |     if (vector.Length != vectorSize)
369 |     {
370 |       throw new ArgumentException("Invalid vector size.", nameof(vector));
371 |     }
372 | 
373 |     var vectorSpan = vector.Span;
374 |     var index = 0;
375 |     var centers = new float[vector.Length];
376 |     Stack<(int index, int i, float center, float step, float length)> state =
377 |       [];
378 | 
379 |     state.Push((0, 0, 0, 1, distance * distance));
380 | 
381 |     while(state.TryPeek(out var item))
382 |     {
383 |       (var prev, index) = (index, item.index);
384 |       var (i, center, step) = (item.i, item.center, item.step);
385 |       var (low, high) = entries[index];
386 | 
387 |       centers[i] = center;
388 | 
389 |       if (prev == high)
390 |       {
391 |         state.Pop();
392 | 
393 |         continue;
394 |       }
395 | 
396 |       var delta = vectorSpan[i] - center;
397 |       var prevDelta = Math.Max(Math.Abs(delta) - step, 0);
398 | 
399 |       if (prev != low && low != -1)
400 |       {
401 |         var length = delta <= 0 ? item.length :
402 |           item.length + (prevDelta - delta) * (prevDelta + delta);
403 | 
404 |         if (length >= 0)
405 |         {
406 |           var half = step / 2;
407 | 
408 |           centers[i] -= half;
409 | 
410 |           if (++i == vectorSize)
411 |           {
412 |             i = 0;
413 |             step = half;
414 |           }
415 | 
416 |           state.Push((low, i, centers[i], step, length));
417 | 
418 |           continue;
419 |         }
420 |       }
421 | 
422 |       if (high != -1)
423 |       {
424 |         var length = delta >= 0 ? item.length :
425 |           item.length + (prevDelta - delta) * (prevDelta + delta);
426 | 
427 |         if (length >= 0)
428 |         {
429 |           var half = step / 2;
430 | 
431 |           centers[i] += half;
432 | 
433 |           if (++i == vectorSize)
434 |           {
435 |             i = 0;
436 |             step = half;
437 |           }
438 | 
439 |           state.Push((high, i, centers[i], step, length));
440 |         }
441 |         else
442 |         {
443 |           state.Pop();
444 |         }
445 | 
446 |         continue;
447 |       }
448 | 
449 |       state.Pop();
450 | 
451 |       if (low == -1)
452 |       {
453 |         var list = records[index];
454 | 
455 |         for(i = list.Count; i-- > 0;)
456 |         {
457 |           if (predicate(list[i], vector))
458 |           {
459 |             list.RemoveAt(i);
460 |           }
461 |         }
462 | 
463 |         if (list is [])
464 |         {
465 |           records.Remove(index);
466 | 
467 |           // NOTE: we do not consolidate lists here.
468 |           while(state.TryPeek(out item))
469 |           {
470 |             (low, high) = entries[item.index];
471 | 
472 |             if (low == -1 || high == -1)
473 |             {
474 |               centers[i] = center;
475 |               index = item.index;
476 |               entries[item.index] = (-1, -1);
477 |               state.Pop();
478 | 
479 |               continue;
480 |             }
481 | 
482 |             entries[item.index] = low == index ? (-1, high) : (low, -1);
483 | 
484 |             break;
485 |           }
486 |         }
487 |       }
488 |     }
489 |   }
490 | 
491 |   public IEnumerable<
492 |     (
493 |       int index,
494 |       int parent,
495 |       ReadOnlyMemory<float> center,
496 |       IReadOnlyList<R>? records
497 |     )> IndexHierarchy
498 |   {
499 |     get
500 |     {
501 |       if (entries is [])
502 |       {
503 |         yield break;
504 |       }
505 | 
506 |       var index = 0;
507 |       var step = 1f;
508 |       var centers = new float[vectorSize];
509 |       Stack<(int index, int parent, int i, float center, float step)> state = [];
510 | 
511 |       state.Push((0, -1, 0, 0, 1));
512 | 
513 |       while(state.TryPeek(out var item))
514 |       {
515 |         var i = item.i;
516 |         (var prev, index) = (index, item.index);
517 |         var (low, high) = entries[index];
518 | 
519 |         centers[i] = item.center;
520 | 
521 |         if (prev == high)
522 |         {
523 |           state.Pop();
524 | 
525 |           continue;
526 |         }
527 | 
528 |         if (prev != low && low != -1)
529 |         {
530 |           var half = step /= 2;
531 | 
532 |           centers[i] -= half;
533 | 
534 |           if (++i == vectorSize)
535 |           {
536 |             i = 0;
537 |             step = half;
538 |           }
539 | 
540 |           state.Push((low, index, i, centers[i], step));
541 | 
542 |           yield return (low, index, centers, null);
543 | 
544 |           continue;
545 |         }
546 | 
547 |         if (high != -1)
548 |         {
549 |           var half = step /= 2;
550 | 
551 |           centers[i] += half;
552 | 
553 |           if (++i == vectorSize)
554 |           {
555 |             i = 0;
556 |             step = half;
557 |           }
558 | 
559 |           state.Push((high, index, i, centers[i], step));
560 | 
561 |           yield return (low, index, centers, null);
562 | 
563 |           continue;
564 |         }
565 | 
566 |         if (low == -1)
567 |         {
568 |           state.Pop();
569 | 
570 |           yield return (index, item.parent, centers, records[index]);
571 |         }
572 |       }
573 |     }
574 |   }
575 | 
576 |   /// <summary>
577 |   /// A function returning a vector for a record.
578 |   /// </summary>
579 |   private readonly Func<R, ReadOnlyMemory<float>> vectorSelector;
580 | 
581 |   /// <summary>
582 |   /// A threshold size to store records in list buckets.
583 |   /// </summary>
584 |   private readonly int listThreshold;
585 | 
586 |   /// <summary>
587 |   /// A size of vector;
588 |   /// </summary>
589 |   private int vectorSize;
590 | 
591 |   /// <summary>
592 |   /// List of buckets.
593 |   /// </summary>
594 |   private readonly List<(int low, int high)> entries = [];
595 | 
596 |   /// <summary>
597 |   /// Record lists by entries.
598 |   /// </summary>
599 |   private readonly Dictionary<int, List<R>> records = [];
600 | 
601 |   /// <summary>
602 |   /// Max depth of vectors before going to list.
603 |   /// </summary>
604 |   private static readonly int maxDepth = ((IFloatingPoint<float>)0f).GetSignificandBitLength();
605 | }
606 | 


--------------------------------------------------------------------------------
/MemoryVectorIndex/MemoryVectorIndex.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net8.0</TargetFramework>
 5 |     <ImplicitUsings>enable</ImplicitUsings>
 6 |     <Nullable>enable</Nullable>
 7 |   </PropertyGroup>
 8 | 
 9 |   <ItemGroup>
10 |     <PackageReference Include="System.Numerics.Tensors" Version="9.0.0-preview.5.24306.7" />
11 |   </ItemGroup>
12 | 
13 | </Project>
14 | 


--------------------------------------------------------------------------------
/MempryVectorIndex.Tests/MemoryVectorIndexTests.cs:
--------------------------------------------------------------------------------
  1 | using ArffTools;
  2 | 
  3 | namespace NesterovskyBros.VectorIndex;
  4 | 
  5 | [TestClass]
  6 | public class MemoryVectorIndexTests
  7 | {
  8 |   const string datasets = "https://raw.githubusercontent.com/nesterovsky-bros/clustering-benchmark/master/src/main/resources/datasets/";
  9 | 
 10 |   [TestMethod]
 11 |   public void Test_3_3()
 12 |   {
 13 |     List<Record> records = [];
 14 | 
 15 |     for(var i = 0; i < 3; ++i)
 16 |     {
 17 |       for(var j = 0; j < 3; ++j)
 18 |       {
 19 |         records.Add(new() 
 20 |         { 
 21 |           id = records.Count, 
 22 |           tag = $"{i},{j}", 
 23 |           vector = [i - 1, j - 1] 
 24 |         });
 25 |       }
 26 |     }
 27 | 
 28 |     Test("Test_3_3", records, [.5f, .9f], .6f);
 29 |   }
 30 | 
 31 |   [TestMethod]
 32 |   public void Test_10_10()
 33 |   {
 34 |     List<Record> records = [];
 35 | 
 36 |     for(var i = 0; i < 10; ++i)
 37 |     { 
 38 |       for(var j = 0; j < 10; ++j)
 39 |       {
 40 |         records.Add(new()
 41 |         {
 42 |           id = records.Count,
 43 |           tag = $"{i},{j}",
 44 |           vector = [(i - 4.5f) / 5, (j - 4.5f) / 5]
 45 |         });
 46 |       }
 47 |     }
 48 | 
 49 |     Test("Test_10_10", records, [.3f, .3f], .3f);
 50 |   }
 51 | 
 52 |   [TestMethod]
 53 |   public void Test_100_100()
 54 |   {
 55 |     List<Record> records = [];
 56 | 
 57 |     for(var i = 0; i < 100; ++i)
 58 |     {
 59 |       for(var j = 0; j < 100; ++j)
 60 |       {
 61 |         records.Add(new()
 62 |         {
 63 |           id = records.Count,
 64 |           tag = $"{i},{j}",
 65 |           vector = [(i - 49.5f) / 50, (j - 49.5f) / 50]
 66 |         });
 67 |       }
 68 |     }
 69 | 
 70 |     Test("Test_100_100", records, [.3f, .3f], .1f);
 71 |   }
 72 | 
 73 |   [TestMethod]
 74 |   public void Test_1000_1000()
 75 |   {
 76 |     List<Record> records = [];
 77 | 
 78 |     for(var i = 0; i < 1000; ++i)
 79 |     {
 80 |       for(var j = 0; j < 1000; ++j)
 81 |       {
 82 |         records.Add(new()
 83 |         {
 84 |           id = records.Count,
 85 |           tag = $"{i},{j}",
 86 |           vector = [(i - 499.5f) / 500, (j - 499.5f) / 500]
 87 |         });
 88 |       }
 89 |     }
 90 | 
 91 |     Test("Test_1000_1000", records, [.3f, .3f], .05f);
 92 |   }
 93 | 
 94 |   [TestMethod]
 95 |   public void Test_100_100_NotNormalizedVectors()
 96 |   {
 97 |     List<Record> records = [];
 98 | 
 99 |     for(var i = 0; i < 100; ++i)
100 |     {
101 |       for(var j = 0; j < 100; ++j)
102 |       {
103 |         records.Add(new()
104 |         {
105 |           id = records.Count,
106 |           tag = $"{i},{j}",
107 |           vector = [i - 1, j - 1]
108 |         });
109 |       }
110 |     }
111 | 
112 |     Test("Test_100_100_NotNormalizedVectors", records, [.3f, .3f], .3f);
113 |   }
114 | 
115 |   [TestMethod]
116 |   public async Task Test_2d_10c()
117 |   {
118 |     var dataset = await Dataset.Read("artificial/2d-10c.arff");
119 |     float[] point = [(73 - dataset.offsetX) / dataset.scale, (70 - dataset.offsetX) / dataset.scale];
120 |     var distance = 10f / dataset.scale;
121 | 
122 |     var match = Test("Test_2d_10c", dataset.records, point, distance);
123 | 
124 |     var view = $"X, Y\n{string.Join(
125 |       '\n', 
126 |       match.Select(record =>
127 |       {
128 |         var vector = dataset.Scale(record.vector);
129 | 
130 |         return $"{vector[0]}, {vector[1]}";
131 |       }))}";
132 | 
133 |     Console.WriteLine(view);
134 |   }
135 | 
136 |   private static List<Record> Test(
137 |     string name,
138 |     List<Record> records,
139 |     float[] point,
140 |     float distance)
141 |   {
142 |     var index = new MemoryVectorIndex<Record>(records, record => record.vector);
143 | 
144 |     //var view = System.Text.Json.JsonSerializer.Serialize(
145 |     //  index.IndexHierarchy.Select(item => new
146 |     //  {
147 |     //    item.index,
148 |     //    item.parent,
149 |     //    center = item.center.ToArray(),
150 |     //    records = item.records?.Select(item => item.vector).ToArray()
151 |     //  }),
152 |     //  new System.Text.Json.JsonSerializerOptions()
153 |     //  {
154 |     //    WriteIndented = true
155 |     //  });
156 | 
157 |     //Console.WriteLine(view);
158 | 
159 |     Assert.AreEqual(index.Count, records.Count);
160 | 
161 |     var plainMatch = records.
162 |       Where(record => Distance(record.vector, point) <= distance).
163 |       ToList();
164 | 
165 |     var testCount = 0;
166 | 
167 |     var match = index.
168 |       Find(
169 |         point,
170 |         distance,
171 |         (record, vector) =>
172 |         {
173 |           ++testCount;
174 | 
175 |           return Distance(record.vector, vector.Span) <= distance;
176 |         }).
177 |       ToList();
178 | 
179 |     var unmatch = records.
180 |       ExceptBy(match.Select(record => record.id), record => record.id).
181 |       ToList();
182 | 
183 |     var invalidMatch = match.
184 |       Where(record => Distance(record.vector, point) > distance).
185 |       ToList();
186 | 
187 |     var invalidUnmatch = unmatch.
188 |       Where(record => Distance(record.vector, point) <= distance).
189 |       ToList();
190 | 
191 |     Console.WriteLine($"{name
192 |       }:\n  records: {records.Count }, distance: {distance
193 |       }\n  matched: {match.Count} - {
194 |       (float)match.Count / records.Count:P1}\n  predicate calls: {
195 |       testCount} - {(float)testCount / records.Count:P1}\n  predicates per match: {
196 |       (float)testCount / match.Count:N1}.");
197 | 
198 |     Assert.AreEqual(invalidMatch.Count, 0);
199 |     Assert.AreEqual(invalidUnmatch.Count, 0);
200 |     Assert.AreEqual(match.Count, plainMatch.Count);
201 |     
202 |     Assert.IsTrue(!match.
203 |       ExceptBy(plainMatch.Select(record => record.id), record => record.id).
204 |       Any());
205 | 
206 |     return match;
207 |   }
208 | 
209 |   private static float Distance(
210 |     ReadOnlySpan<float> a,
211 |     ReadOnlySpan<float> b)
212 |   {
213 |     var x = a[0] - b[0];
214 |     var y = a[1] - b[1];
215 | 
216 |     return MathF.Sqrt(x * x + y * y);
217 |   }
218 | 
219 |   public record struct Record
220 |   {
221 |     public float X => vector?[0] ?? 0;
222 |     public float Y => vector?[1] ?? 0;
223 | 
224 |     public int id;
225 |     public string? tag;
226 |     public float[] vector;
227 |   }
228 | 
229 |   public record Dataset
230 |   {
231 |     public List<Record> records = null!;
232 |     public float offsetX;
233 |     public float offsetY;
234 |     public float scale;
235 | 
236 |     public Dataset() { }
237 | 
238 |     public Dataset(List<Record> records, bool normalize = true)
239 |     {
240 |       this.records = records;
241 | 
242 |       if (!normalize)
243 |       {
244 |         scale = 1;
245 | 
246 |         return;
247 |       }
248 | 
249 |       var minX = float.PositiveInfinity;
250 |       var maxX = float.NegativeInfinity;
251 |       var minY = float.PositiveInfinity;
252 |       var maxY = float.NegativeInfinity;
253 | 
254 |       foreach(var record in records)
255 |       {
256 |         var x = record.vector[0];
257 |         var y = record.vector[1];
258 | 
259 |         minX = Math.Min(minX, x);
260 |         maxX = Math.Max(maxX, x);
261 |         minY = Math.Min(minY, y);
262 |         maxY = Math.Max(maxY, y);
263 |       }
264 | 
265 |       if (minX >= -1 && maxX <= 1 && minY >= -1 && maxY <= 1)
266 |       {
267 |         scale = 1;
268 | 
269 |         return;
270 |       }
271 | 
272 |       if (maxX - minX <= 2 && maxY - minY <= 2)
273 |       {
274 |         scale = 1;
275 |         offsetX = minX >= -1 && maxX <= 1 ? 0 : (minX + maxX) / 2;
276 |         offsetY = minY >= -1 && maxY <= 1 ? 0 : (minY + maxY) / 2;
277 | 
278 |         foreach(var record in records)
279 |         {
280 |           var vector = record.vector;
281 | 
282 |           vector[0] -= offsetX;
283 |           vector[1] -= offsetY;
284 |         }
285 | 
286 |         return;
287 |       }
288 |       else
289 |       {
290 |         scale = Math.Max(maxX - minX, maxY - minY) / 2;
291 |         offsetX = minX >= -1 && maxX <= 1 ? 0 : (minX + maxX) / 2;
292 |         offsetY = minY >= -1 && maxY <= 1 ? 0 : (minY + maxY) / 2;
293 | 
294 |         foreach(var record in records)
295 |         {
296 |           var vector = record.vector;
297 | 
298 |           vector[0] = (vector[0] - offsetX) / scale;
299 |           vector[1] = (vector[01] - offsetY) / scale;
300 |         }
301 | 
302 |         return;
303 |       }
304 |     }
305 | 
306 |     public static async Task<Dataset> Read(string path, bool normalize = true)
307 |     {
308 |       List<Record> records = [];
309 | 
310 |       {
311 |         using var client = new HttpClient();
312 |         using var reader =
313 |           new ArffReader(await client.GetStreamAsync($"{datasets}/{path}"));
314 |         var header = reader.ReadHeader();
315 | 
316 |         while(true)
317 |         {
318 |           var row = reader.ReadInstance();
319 | 
320 |           if (row == null)
321 |           {
322 |             break;
323 |           }
324 | 
325 |           var x = Convert.ToSingle(row[0]);
326 |           var y = Convert.ToSingle(row[1]);
327 |           var tag = Convert.ToString(row[2]);
328 | 
329 |           records.Add(new()
330 |           {
331 |             id = records.Count,
332 |             tag = tag,
333 |             vector = [x, y]
334 |           });
335 |         }
336 |       }
337 | 
338 |       return new Dataset(records, normalize);
339 |     }
340 | 
341 |     public float[] Scale(float[] vector) =>
342 |       [vector[0] * scale + offsetX, vector[1] * scale + offsetY];
343 |   }
344 | }


--------------------------------------------------------------------------------
/MempryVectorIndex.Tests/MempryVectorIndex.Tests.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net8.0</TargetFramework>
 5 |     <ImplicitUsings>enable</ImplicitUsings>
 6 |     <Nullable>enable</Nullable>
 7 | 
 8 |     <IsPackable>false</IsPackable>
 9 |     <IsTestProject>true</IsTestProject>
10 |   </PropertyGroup>
11 | 
12 |   <ItemGroup>
13 |     <PackageReference Include="coverlet.collector" Version="6.0.2">
14 |       <PrivateAssets>all</PrivateAssets>
15 |       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
16 |     </PackageReference>
17 |     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
18 |     <PackageReference Include="MSTest.TestAdapter" Version="3.5.0-preview.24325.7" />
19 |     <PackageReference Include="MSTest.TestFramework" Version="3.5.0-preview.24325.7" />
20 |     <PackageReference Include="ArffTools" Version="1.1.3" />
21 |   </ItemGroup>
22 | 
23 |   <ItemGroup>
24 |     <ProjectReference Include="..\MemoryVectorIndex\MemoryVectorIndex.csproj" />
25 |   </ItemGroup>
26 | 
27 |   <ItemGroup>
28 |     <Using Include="Microsoft.VisualStudio.TestTools.UnitTesting" />
29 |   </ItemGroup>
30 | 
31 | </Project>
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # vector-database
  2 | Turn SQL Server into vector database
  3 | 
  4 | # Turn Akinator into vector database!
  5 | Several years ago we have shown how to turn SQL Server into Akinator like engine. See [KB](https://github.com/nesterovsky-bros/KB) repository.
  6 | 
  7 | At that time we did not know about vector databases.
  8 | We just implemented a binary search index to identify an object from a set of objects by a set of properties inherent to that object.
  9 | 
 10 | ## Briefly
 11 | Assume you have a set of objects.  
 12 | Assume you have a set of boolean properties.  
 13 | We hava a matix of `[objects x properties]` with `true` or `false` in cells.  
 14 | 
 15 | If we present all properties as a vectors: `[p1, p2, p3, ..., pn]` then we turn original task to a task of creating an index of objects by a vector of booleans.
 16 | 
 17 | ## Present time vector database
 18 | It is only a half a step to extend vector of booleans to vector of floats. It is enough to say that float can be represented as a set of bits (booleans), so all ideas of KB database apply to a vector database.
 19 | 
 20 | ## Vector database
 21 |   ### Let's formulate the idea.
 22 | 1. We have a set of vectors.  
 23 | 2. We want to build an index that allows us to efficiently find vectors in some vicinity of a given vector.  
 24 | 3. To achieve the goal we use "divide and conquer" method.
 25 | 
 26 | 3.1. Split whole vector space in two parts.  
 27 |   There are multiple ways to do this but we selected one of the simplest and available in the SQL.  
 28 |   We calculate a mean `avg()` and a standard deviation `stdev()` of all vectors for each dimension.  
 29 |   For the split we select a dimension with highest and lowest standard deviation, and split in the mean point.  
 30 |   This gives us two subsets of vectors of similar cardinality.  
 31 |   
 32 | 3.2. Repeat step 3.1 for each subset, unless it contains exactly one vector.    
 33 | 
 34 | The height of the tree that we build this way is proportional to `Log2(N)`, where `N` is number of vectors in the set.  
 35 | Estimation gives that for a set of `N` vectors the number of operations required to build such binary index is proportional to `N*Log2(N)`.  
 36 | Obviously, compexity of algorithm is propotional to a dimension of vectors.
 37 | 
 38 |   ### SQL Server
 39 | SQL Server lets us to store float vectors as JSON. Not the best storage type, but we go for it.
 40 | Here is our vector table:
 41 | 
 42 | ```SQL
 43 | create table dbo.Text
 44 | (
 45 |   TextID bigint not null primary key,
 46 |   Text nvarchar(max) null,
 47 |   Vector varchar(max) null
 48 | );
 49 | ```
 50 | 
 51 | Please note that this table is used to bind `TextID` to `Vector` and to build the search index, but not for a search itself.
 52 | 
 53 | Here is a structure of the binary index:
 54 | 
 55 | ```SQL
 56 | create table dbo.TextIndex
 57 | (
 58 |   RangeID bigint not null primary key,
 59 |   Dimension smallint null,
 60 |   Mid real null,
 61 |   LowRangeID bigint null,
 62 |   HighRangeID bigint null,
 63 |   TextID bigint null
 64 | );
 65 | ```
 66 | 
 67 | The search starts from a given `vector` and a `proximity`.  
 68 | We start from the root `RangeID = 0`, and compare `Dimension` of input `vector ± proximity` against `Mid`.  
 69 | Depending on the outcome we proceed to low (`LowRangeID`), high (`HighRangeID`), or to both ranges.  
 70 | We repeat previous step with next ranges until we locate all matched vectors.
 71 | 
 72 | Estimation tells that we shall complete the searh at most in `Log2(N)` steps.
 73 | 
 74 | ## Implementation
 75 | An implementation may worth more than theories.
 76 | So, you're welcome to see it in [DDL.sql](./DDL.sql)
 77 | 
 78 | ## Use
 79 | 1. Create a document: insert something in `dbo.Document`.
 80 | 2. Populate vectors: insert something into `dbo.Text`. Note that `dbo.Text.Vector` should be a JSON array of floats.
 81 | 3. Index the document: call the stored procedure `dbo.IndexDocument`.
 82 | 4. Do the search: call the table valued function `dbo.Search`.
 83 | 
 84 | That's all.
 85 | Thank you for you attention.
 86 | 
 87 | **P.S.:** In addition we have implemented similar [index builder algorithm](./VectorIndex/IndexBuilder.cs) in C#. Though it has the same asymptotical complexity `N*Log2(N)`, it works faster. So, in more complex setup index builder may be implemented in the C#, and search is in the pure SQL.
 88 | 
 89 | **P.P.S.:** By the same token we can implement efficient vector index in any SQL database that supports recursive CTE (like SQLite), or in CosmosDB as a function.
 90 | 
 91 |  ## C#
 92 | 
 93 | It turned out that our initial parallel C# implementation is not scalable for relatively big datasets like deep-image-96-angular, containing ~10M vectors.
 94 | Though it is parallel and has `O(N*Log2(N))` complexity, it runs wildly against Process/CPU data locality, and producess enormous number of Page Faults.
 95 | Alternative data storage like FasterKV turns out to be too slow.
 96 | 
 97 | So, we went and refactored the code from parallel tree level processor into sequential tree walker.
 98 | It virtually follows steps 3.1, and 3.2 sequentially for one range at time. See: https://github.com/nesterovsky-bros/vector-database/blob/deea9da842cb12e4edcde4e03a1e68014754d15b/VectorIndex/IndexBuilder.cs#L488.
 99 | 
100 | In such mode we are able to build an index on a laptop just in 3 minutes.
101 | 
102 | Right now we want to implement benchmarks like in https://qdrant.tech/benchmarks/, though we're not going to implement Client-Server protocol right now. 
103 | Yet, we think we can get the idea where we stand comparing to other vector engines.
104 | 
105 | 


--------------------------------------------------------------------------------
/VectorIndex.MainTest/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System.Diagnostics;
  2 | 
  3 | using HDF.PInvoke;
  4 | 
  5 | using HDF5CSharp;
  6 | 
  7 | using NesterovskyBros.VectorIndex;
  8 | 
  9 | var randomInput = GetRandomDataset((int)DateTime.Now.Ticks, 10000, 1536);
 10 | 
 11 | // Test2 memory
 12 | if (true)
 13 | {
 14 |   var stopwatch = new Stopwatch();
 15 | 
 16 |   stopwatch.Start();
 17 | 
 18 |   var index = new Dictionary<long, RangeValue>();
 19 | 
 20 |   await foreach(var (rangeId, range) in 
 21 |     Test(
 22 |       randomInput,
 23 |       (_, _) => new MemoryRangeStore()))
 24 |   {
 25 |     index.Add(rangeId, range);
 26 |   }
 27 | 
 28 |   stopwatch.Stop();
 29 | 
 30 |   Console.WriteLine($"Build index: {stopwatch.Elapsed}");
 31 | }
 32 | 
 33 | // Crafted set.
 34 | if (true)
 35 | {
 36 |   var stopwatch = new Stopwatch();
 37 | 
 38 |   stopwatch.Start();
 39 | 
 40 |   var index = new Dictionary<long, RangeValue>();
 41 | 
 42 |   await foreach(var (rangeId, range) in
 43 |     Test(
 44 |       input(),
 45 |       (_, _) => new MemoryRangeStore()))
 46 |   {
 47 |     index.Add(rangeId, range);
 48 |   }
 49 | 
 50 |   stopwatch.Stop();
 51 | 
 52 |   Console.WriteLine($"Build index: {stopwatch.Elapsed}");
 53 | 
 54 |   async IAsyncEnumerable<(long, Memory<float>)> input()
 55 |   {
 56 |     var dimensions = 1536;
 57 | 
 58 |     for(var i = 0L; i < dimensions; ++i)
 59 |     {
 60 |       var vector = new float[dimensions];
 61 | 
 62 |       vector[i] = 1;
 63 | 
 64 |       yield return (i, vector);
 65 |     }
 66 |   }
 67 | }
 68 | 
 69 | // Test deep-image-96-angular.hdf5
 70 | if (true)
 71 | {
 72 |   var fileName = args.Length > 0 ? args[0] : null;
 73 | 
 74 |   if(fileName != null)
 75 |   {
 76 |     using var outputWriter = args.Length > 1 ? File.CreateText(args[1]) : null;
 77 | 
 78 |     if (outputWriter != null)
 79 |     {
 80 |       await outputWriter.WriteLineAsync("RangeID,Dimension,Mid,ID");
 81 |     }
 82 | 
 83 |     // /train, /test
 84 |     var (size, dimension) = GetHdf5DatasetSize(fileName, "/train");
 85 |     var datasetInput = GetHdf5Dataset(fileName, "/train", size, dimension);
 86 |     using var store = new FileRangeStore(size, dimension, 10000);
 87 | 
 88 |     var stopwatch = new Stopwatch();
 89 | 
 90 |     if(args.Length > 2)
 91 |     {
 92 |       var count = 0L;
 93 |       using var trainWriter = File.CreateText(args[2]);
 94 | 
 95 |       await trainWriter.WriteLineAsync("ID|Vector");
 96 | 
 97 |       await foreach(var (id, vector) in datasetInput)
 98 |       {
 99 |         await trainWriter.WriteLineAsync($"{id}|{string.Join(',', vector.ToArray())}");
100 | 
101 |         ++count;
102 | 
103 |         if(count % 100000 == 0)
104 |         {
105 |           Console.WriteLine($"Processed {count} records.");
106 |         }
107 |       }
108 |     }
109 | 
110 |     if (args.Length > 3)
111 |     {
112 |       var count = 0L;
113 |       var (testSize, testDimension) = GetHdf5DatasetSize(fileName, "/test");
114 |       var testDataset = GetHdf5Dataset(fileName, "/test", testSize, testDimension);
115 |      
116 |       using var testWriter = File.CreateText(args[3]);
117 | 
118 |       await testWriter.WriteLineAsync("ID,Vector");
119 | 
120 |       await foreach(var (id, vector) in testDataset)
121 |       {
122 |         await testWriter.WriteLineAsync($"{id}|{string.Join(',', vector.ToArray())}");
123 | 
124 |         ++count;
125 | 
126 |         if(count % 100000 == 0)
127 |         {
128 |           Console.WriteLine($"Processed {count} records.");
129 |         }
130 |       }
131 |     }
132 | 
133 |     stopwatch.Start();
134 | 
135 |     var index = new Dictionary<long, RangeValue>();
136 | 
137 |     await foreach(var (rangeId, range) in
138 |       Test(
139 |         datasetInput,
140 |         //(_, _) => new MemoryRangeStore()).
141 |         store.NextStore))
142 |     {
143 |       index.Add(rangeId, range);
144 | 
145 |       if (outputWriter != null)
146 |       {
147 |         await outputWriter.WriteLineAsync(
148 |           $"{rangeId},{range.Dimension},{range.Mid},{range.Id}");
149 |       }
150 |     }
151 | 
152 |     stopwatch.Stop();
153 | 
154 |     Console.WriteLine($"Build index: {stopwatch.Elapsed}, ranges: {index.Count}");
155 |   }
156 | }
157 | 
158 | IAsyncEnumerable<(long rangeId, RangeValue range)> Test(
159 |   IAsyncEnumerable<(long id, Memory<float> vector)> input,
160 |   Func<long, long, IRangeStore> storeFactory) =>
161 |   IndexBuilder.Build(input, storeFactory);
162 | 
163 | async IAsyncEnumerable<(long id, Memory<float> vector)> GetRandomDataset(
164 |   int seed, 
165 |   long count, 
166 |   short dimensions)
167 | {
168 |   var random = new Random(seed);
169 | 
170 |   for(var i = 0L; i < count; ++i)
171 |   {
172 |     var vector = new float[dimensions];
173 | 
174 |     for(var j = 0; j < vector.Length; ++j)
175 |     {
176 |       vector[j] = random.NextSingle() * 2 - 1;
177 |     }
178 | 
179 |     yield return (i, vector);
180 |   }
181 | }
182 | 
183 | (long count, short dimensions) GetHdf5DatasetSize(
184 |   string fileName, 
185 |   string datasetName)
186 | {
187 |   var fileId = Hdf5.OpenFile(fileName, true);
188 |   var datasetId = H5D.open(fileId, Hdf5Utils.NormalizedName(datasetName));
189 | 
190 |   try
191 |   {
192 |     var spaceId = H5D.get_space(datasetId);
193 | 
194 |     try
195 |     {
196 |       int rank = H5S.get_simple_extent_ndims(spaceId);
197 | 
198 |       if(rank != 2)
199 |       {
200 |         throw new InvalidOperationException("Invalid rank.");
201 |       }
202 | 
203 |       ulong[] maxDims = new ulong[rank];
204 |       ulong[] dims = new ulong[rank];
205 | 
206 |       H5S.get_simple_extent_dims(spaceId, dims, maxDims);
207 | 
208 |       return (checked((long)maxDims[0]), checked((short)maxDims[1]));
209 |     }
210 |     finally
211 |     {
212 |       H5S.close(spaceId);
213 |     }
214 |   }
215 |   finally
216 |   {
217 |     H5D.close(datasetId);
218 |   }
219 | } 
220 | 
221 | async IAsyncEnumerable<(long id, Memory<float> vector)> GetHdf5Dataset(
222 |   string fileName, 
223 |   string datasetName,
224 |   long size,
225 |   short dimension)
226 | {
227 |   var index = 0L;
228 |   var step = 100000;
229 |   var fileId = Hdf5.OpenFile(fileName, true);
230 | 
231 |   try
232 |   { 
233 |     while(index < size)
234 |     {
235 |       var rows = Hdf5.ReadDataset<float>(
236 |         fileId,
237 |         datasetName,
238 |         checked((ulong)index),
239 |         Math.Min(checked((ulong)(index + step - 1)), checked((ulong)(size - 1))));
240 | 
241 |       var count = rows.GetLength(0);
242 | 
243 |       for(var i = 0; i < count; i++)
244 |       {
245 |         var row = new float[dimension];
246 | 
247 |         for(var j = 0; j < row.Length; ++j)
248 |         {
249 |           row[j] = rows[i, j];
250 |         }
251 | 
252 |         yield return (index++, row);
253 |       }
254 |     }
255 |   }
256 |   finally
257 |   {
258 |     Hdf5.CloseFile(fileId);
259 |   }
260 | }
261 | 


--------------------------------------------------------------------------------
/VectorIndex.MainTest/VectorIndex.MainTest.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |   </PropertyGroup>
 9 | 
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
11 |     <Optimize>False</Optimize>
12 |   </PropertyGroup>
13 | 
14 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
15 |     <Optimize>False</Optimize>
16 |   </PropertyGroup>
17 | 
18 |   <ItemGroup>
19 |     <PackageReference Include="HDF5-CSharp" Version="1.19.0" />
20 |   </ItemGroup>
21 | 
22 |   <ItemGroup>
23 |     <ProjectReference Include="..\VectorIndex\VectorIndex.csproj" />
24 |   </ItemGroup>
25 | 
26 | </Project>
27 | 


--------------------------------------------------------------------------------
/VectorIndex/FileRangeStore.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System.IO.MemoryMappedFiles;
  2 | using System.Runtime.CompilerServices;
  3 | using System.Runtime.InteropServices;
  4 | 
  5 | namespace NesterovskyBros.VectorIndex;
  6 | 
  7 | /// <summary>
  8 | /// A memory mapped file as a store for building vector index.
  9 | /// </summary>
 10 | public class FileRangeStore: IDisposable
 11 | {
 12 |   /// <summary>
 13 |   /// Creates a <see cref="MemoryMappedIndexTempStore"/> instance.
 14 |   /// </summary>
 15 |   /// <param name="count">Number of vectors.</param>
 16 |   /// <param name="dimensions">Dimension of vectors.</param>
 17 |   /// <param name="buffer">A buffer size.</param>
 18 |   public FileRangeStore(long count, short dimensions, int buffer = 10000)
 19 |   {
 20 |     this.dimensions = dimensions;
 21 |     this.buffer = buffer;
 22 |     capacity = checked(
 23 |       (Marshal.SizeOf<long>() + Marshal.SizeOf<float>() * dimensions) * 
 24 |         4 * count);
 25 |     highOffset = capacity / 2;
 26 |     file = MemoryMappedFile.CreateNew(null, capacity);
 27 |   }
 28 | 
 29 |   /// <summary>
 30 |   /// Releases resources.
 31 |   /// </summary>
 32 |   public void Dispose() => file.Dispose();
 33 | 
 34 |   /// <summary>
 35 |   /// Gets next <see cref="IRangeStore"/> instance.
 36 |   /// </summary>
 37 |   /// <param name="rangeId">A range id.</param>
 38 |   /// <param name="capacity">A capacity.</param>
 39 |   /// <returns>The <see cref="IRangeStore"/> instance.</returns>
 40 |   public IRangeStore NextStore(
 41 |     long rangeId,
 42 |     long capacity) => 
 43 |     new RangeStore(this, rangeId, capacity);
 44 | 
 45 |   private class RangeStore: IRangeStore
 46 |   {
 47 |     public RangeStore(
 48 |       FileRangeStore container,
 49 |       long rangeId,
 50 |       long capacity)
 51 |     {
 52 |       this.container = container;
 53 |       this.rangeId = rangeId;
 54 |       this.capacity = capacity;
 55 |     }
 56 | 
 57 |     public ValueTask Add(long id, Memory<float> vector)
 58 |     {
 59 |       if (vector.Length != container.dimensions)
 60 |       {
 61 |         throw new ArgumentException(
 62 |           "Invalid length of vector.", 
 63 |           nameof(vector));
 64 |       }
 65 | 
 66 |       if (data.Count >= container.buffer)
 67 |       {
 68 |         Flush();
 69 |       }
 70 | 
 71 |       data.Add((id, vector));
 72 |       ++count;
 73 | 
 74 |       return ValueTask.CompletedTask;
 75 |     }
 76 | 
 77 | #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
 78 |     public async IAsyncEnumerable<(long id, Memory<float> vector)> GetPoints()
 79 | #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
 80 |     {
 81 |       if (stream != null)
 82 |       {
 83 |         stream.Position = 0;
 84 | 
 85 |         for(var i = 0L; i < count - data.Count; ++i)
 86 |         {
 87 |           var id = 0L;
 88 |           var vector = new float[container.dimensions];
 89 | 
 90 |           stream.Read(MemoryMarshal.CreateSpan(
 91 |             ref Unsafe.As<long, byte>(ref id),
 92 |             Marshal.SizeOf<long>()));
 93 |           stream.Read(
 94 |             MemoryMarshal.CreateSpan(
 95 |               ref Unsafe.As<float, byte>(ref vector[0]),
 96 |               Marshal.SizeOf<float>() * container.dimensions));
 97 | 
 98 |           yield return (id, vector);
 99 |         }
100 |       }
101 | 
102 |       foreach(var item in data)
103 |       {
104 |         yield return item;
105 |       }
106 |     }
107 | 
108 |     public ValueTask DisposeAsync()
109 |     {
110 |       if (stream != null)
111 |       { 
112 |         stream.Dispose();
113 | 
114 |         if ((rangeId & 1) != 0)
115 |         {
116 |           container.lowOffset = start;
117 |         }
118 |         else
119 |         {
120 |           container.highOffset = start;
121 |         }
122 |       }
123 | 
124 |       return ValueTask.CompletedTask;
125 |     }
126 | 
127 |     private void Flush()
128 |     {
129 |       if (stream == null)
130 |       {
131 |         start = 
132 |           (rangeId & 1) != 0 ? container.lowOffset : container.highOffset;
133 | 
134 |         stream = container.file.CreateViewStream(
135 |           start,
136 |           (Marshal.SizeOf<long>() + 
137 |             Marshal.SizeOf<float>() * container.dimensions) * capacity);
138 |       }
139 | 
140 |       foreach(var (id, vector) in data)
141 |       {
142 |         var idRef = id;
143 | 
144 |         stream.Write(MemoryMarshal.CreateSpan(
145 |           ref Unsafe.As<long, byte>(ref idRef),
146 |           Marshal.SizeOf<long>()));
147 |         stream.Write(
148 |           MemoryMarshal.CreateSpan(
149 |             ref Unsafe.As<float, byte>(ref vector.Span[0]),
150 |             Marshal.SizeOf<float>() * container.dimensions));
151 |       }
152 | 
153 |       data.Clear();
154 | 
155 |       var offset = start + stream.Position;
156 | 
157 |       if ((rangeId & 1) != 0)
158 |       {
159 |         container.lowOffset = offset;
160 |       }
161 |       else
162 |       {
163 |         container.highOffset = offset;
164 |       }
165 |     }
166 | 
167 |     private readonly long rangeId;
168 |     private readonly FileRangeStore container;
169 |     private readonly List<(long id, Memory<float> vector)> data = new();
170 |     private readonly long capacity;
171 |     private long start;
172 |     private long count;
173 |     private Stream? stream;
174 |   }
175 | 
176 |   private readonly int buffer;
177 |   private readonly short dimensions;
178 |   private readonly long capacity;
179 |   private readonly MemoryMappedFile file;
180 |   private long lowOffset;
181 |   private long highOffset;
182 | }
183 | 


--------------------------------------------------------------------------------
/VectorIndex/IRangeStore.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace NesterovskyBros.VectorIndex;
 2 | 
 3 | /// <summary>
 4 | /// An interface encapsulating range store.
 5 | /// </summary>
 6 | public interface IRangeStore: IAsyncDisposable
 7 | {
 8 |   /// <summary>
 9 |   /// Adds a range to the store.
10 |   /// </summary>
11 |   /// <param name="id">A vector id.</param>
12 |   /// <param name="vector">A vector.</param>
13 |   /// <returns>A value task.</returns>
14 |   ValueTask Add(long id, Memory<float> vector);
15 | 
16 |   /// <summary>
17 |   /// Gets async enumerable of stored ranges.
18 |   /// </summary>
19 |   /// <returns>A enumerable of vectors in range.</returns>
20 |   IAsyncEnumerable<(long id, Memory<float> vector)> 
21 |     GetPoints();
22 | }
23 | 


--------------------------------------------------------------------------------
/VectorIndex/IndexBuilder.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System;
  2 | using System.Collections.Concurrent;
  3 | using System.Drawing;
  4 | using System.Numerics;
  5 | using System.Text;
  6 | 
  7 | namespace NesterovskyBros.VectorIndex;
  8 | 
  9 | /// <summary>
 10 | /// An API to build vector index.
 11 | /// </summary>
 12 | public partial class IndexBuilder
 13 | {
 14 |   /// <summary>
 15 |   /// Gets range enumerations of points.
 16 |   /// </summary>
 17 |   /// <param name="points">A points enumeration.</param>
 18 |   /// <param name="storeFactory">
 19 |   /// A factory to create a temporary store of points. Called as:
 20 |   /// <code>storeFactory(rangeId, capacity)</code>.
 21 |   /// </param>
 22 |   /// <returns></returns>
 23 |   public static async IAsyncEnumerable<(long rangeId, RangeValue range)> Build(
 24 |     IAsyncEnumerable<(long id, Memory<float> vector)> points,
 25 |     Func<long, long, IRangeStore> storeFactory)
 26 |   {
 27 |     var iteration = 0L;
 28 |     var level = 0;
 29 | 
 30 |     Stats[]? stats = null;
 31 |     Stack<(long rangeId, IRangeStore store, bool max)> stack = new();
 32 | 
 33 |     stack.Push((0, new RangeStore { points = points }, true));
 34 | 
 35 |     try
 36 |     {
 37 |       while(stack.TryPop(out var item))
 38 |       {
 39 |         try
 40 |         {
 41 |           ++iteration;
 42 | 
 43 |           level = Math.Max(
 44 |             level, 
 45 |             64 - BitOperations.LeadingZeroCount((ulong)item.rangeId));
 46 | 
 47 |           if (iteration < 10 ||
 48 |             iteration < 1000 && iteration % 100 == 0 ||
 49 |             iteration < 10000 && iteration % 1000 == 0 ||
 50 |             iteration % 10000 == 0)
 51 |           {
 52 |             Console.WriteLine($"Process {iteration} ranges. Level {level}");
 53 |           }
 54 | 
 55 |           var count = 0L;
 56 | 
 57 |           await foreach(var (id, vector) in item.store.GetPoints())
 58 |           {
 59 |             if (count++ == 0)
 60 |             {
 61 |               stats ??= new Stats[vector.Length];
 62 |               InitStats(id, vector);
 63 |             }
 64 |             else
 65 |             {
 66 |               UpdateStats(id, vector);
 67 |             }
 68 |           }
 69 | 
 70 |           if (count == 0)
 71 |           {
 72 |             continue;
 73 |           }
 74 | 
 75 |           var max = item.max;
 76 |           
 77 |           var (match, index) = stats!.
 78 |             Select((stats, index) => (stats, index)).
 79 |             MaxBy(item => max ? item.stats.Stdev2N : -item.stats.Stdev2N);
 80 | 
 81 |           RangeValue range = count == 1 ?
 82 |             new() { Dimension = -1, Id = (long)stats![0].IdN } :
 83 |             new() 
 84 |             { 
 85 |               Dimension = index, 
 86 |               Mid = match.Mean, 
 87 |               Id = (long)(match.IdN / match.Count)
 88 |             };
 89 | 
 90 |           var rangeId = item.rangeId;
 91 | 
 92 |           yield return (rangeId, range);
 93 | 
 94 |           if (count == 1)
 95 |           {
 96 |             continue;
 97 |           }
 98 | 
 99 |           var lowRangeId = checked(rangeId * 2 + 1);
100 |           var low = storeFactory(lowRangeId, count);
101 | 
102 |           try
103 |           {
104 |             var highRangeId = checked(rangeId * 2 + 2);
105 |             var high = storeFactory(highRangeId, count);
106 | 
107 |             //var lowCount = 0L;
108 | 
109 |             try
110 |             {
111 |               await foreach(var (id, vector) in item.store.GetPoints())
112 |               {
113 |                 var value = vector.Span[range.Dimension];
114 | 
115 |                 if (value > range.Mid || value == range.Mid && id > range.Id)
116 |                 {
117 |                   await high.Add(id, vector);
118 |                 }
119 |                 else
120 |                 {
121 |                   await low.Add(id, vector);
122 |                   //++lowCount;
123 |                 }
124 |               }
125 | 
126 |               //stack.Push((lowRangeId, low, lowCount < count * 0.70710678118654752440084436210485));
127 |               //stack.Push((highRangeId, high, lowCount > count * (1 - 0.70710678118654752440084436210485)));
128 |               stack.Push((lowRangeId, low, !max));
129 |               stack.Push((highRangeId, high, !max));
130 |             }
131 |             catch
132 |             {
133 |               await high.DisposeAsync();
134 | 
135 |               throw;
136 |             }
137 |           }
138 |           catch
139 |           {
140 |             await low.DisposeAsync();
141 | 
142 |             throw;
143 |           }
144 |         }
145 |         finally
146 |         {
147 |           await item.store.DisposeAsync();
148 |         }
149 |       }
150 |     }
151 |     finally
152 |     {
153 |       while(stack.TryPop(out var item))
154 |       {
155 |         await item.store.DisposeAsync();
156 |       }
157 |     }
158 | 
159 |     void InitStats(long id, Memory<float> point)
160 |     {
161 |       var span = point.Span;
162 | 
163 |       for(var i = 0; i < span.Length; ++i)
164 |       {
165 |         stats[i] = new()
166 |         {
167 |           Mean = span[i],
168 |           Stdev2N = 0,
169 |           Count = 1,
170 |           IdN = id
171 |         };
172 |       }
173 |     }
174 | 
175 |     void UpdateStats(long id, Memory<float> point)
176 |     {
177 |       var span = point.Span;
178 | 
179 |       for(var i = 0; i < span.Length; ++i)
180 |       {
181 |         var value = span[i];
182 |         ref var item = ref stats![i];
183 |         var pa = item.Mean;
184 |         var pq = item.Stdev2N;
185 |         var count = item.Count + 1;
186 |         var a = pa + (value - pa) / count;
187 |         var q = pq + (value - pa) * (value - a);
188 | 
189 |         item = new()
190 |         {
191 |           Mean = a,
192 |           Stdev2N = q,
193 |           Count = count,
194 |           IdN = item.IdN + id
195 |         };
196 |       }
197 |     }
198 |   }
199 | 
200 |   private class RangeStore: IRangeStore
201 |   {
202 |     public IAsyncEnumerable<(long id, Memory<float> vector)> points;
203 | 
204 |     public ValueTask DisposeAsync() => ValueTask.CompletedTask;
205 | 
206 |     public ValueTask Add(long id, Memory<float> vector)
207 |     {
208 |       throw new NotImplementedException();
209 |     }
210 | 
211 |     public IAsyncEnumerable<(long id, Memory<float> vector)> GetPoints() => points;
212 |   }
213 | }
214 | 


--------------------------------------------------------------------------------
/VectorIndex/MemoryRangeStore.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace NesterovskyBros.VectorIndex;
 2 | 
 3 | /// <summary>
 4 | /// <para>A memory implementation of <see cref="IRangeStore"/>.</para>
 5 | /// <para>Note that instances of this class are not thread safe.</para>
 6 | /// </summary>
 7 | public class MemoryRangeStore : IRangeStore
 8 | {
 9 |   /// <inheritdoc/>
10 |   public ValueTask DisposeAsync() => ValueTask.CompletedTask;
11 | 
12 |   /// <inheritdoc/>
13 |   public ValueTask Add(long id, Memory<float> vector)
14 |   {
15 |     data.Add((id, vector));
16 | 
17 |     return ValueTask.CompletedTask;
18 |   }
19 | 
20 |   /// <inheritdoc/>
21 | #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
22 |   public async IAsyncEnumerable<(long id, Memory<float> vector)> GetPoints()
23 | #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
24 |   {
25 |     foreach(var item in data)
26 |     {
27 |       yield return item;
28 |     }
29 |   }
30 | 
31 |   private List<(long id, Memory<float> vector)> data = new();
32 | }
33 | 


--------------------------------------------------------------------------------
/VectorIndex/RangeValue.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace NesterovskyBros.VectorIndex;
 2 | 
 3 | /// <summary>
 4 | /// A range spliting space by a specified dimension into two subregions.
 5 | /// </summary>
 6 | public readonly record struct RangeValue
 7 | {
 8 |   /// <summary>
 9 |   /// Index of dimension being indexed.
10 |   /// </summary>
11 |   public int Dimension { get; init; }
12 | 
13 |   /// <summary>
14 |   /// A middle point of range.
15 |   /// </summary>
16 |   public float Mid { get; init; }
17 | 
18 |   /// <summary>
19 |   /// Optional point id fit into the range.
20 |   /// </summary>
21 |   public long Id { get; init; }
22 | }
23 | 


--------------------------------------------------------------------------------
/VectorIndex/Stats.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace NesterovskyBros.VectorIndex;
 2 | 
 3 | /// <summary>
 4 | /// An aggregation stats.
 5 | /// </summary>
 6 | public readonly record struct Stats
 7 | {
 8 |   /// <summary>
 9 |   /// A mean value
10 |   /// </summary>
11 |   public float Mean { get; init; }
12 | 
13 |   /// <summary>
14 |   /// A stdev^2*N value.
15 |   /// </summary>
16 |   public float Stdev2N { get; init; }
17 | 
18 |   /// <summary>
19 |   /// Number of items collected.
20 |   /// </summary>
21 |   public long Count { get; init; }
22 | 
23 |   /// <summary>
24 |   /// Sum of ids to get mean Id value.
25 |   /// </summary>
26 |   public Int128 IdN { get; init; }
27 | }
28 | 


--------------------------------------------------------------------------------
/VectorIndex/VectorIndex.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net8.0</TargetFramework>
 5 |     <ImplicitUsings>enable</ImplicitUsings>
 6 |     <Nullable>enable</Nullable>
 7 |     <RootNamespace>NesterovskyBros.VectorIndex</RootNamespace>
 8 |     <AssemblyName>VectorIndex</AssemblyName>
 9 |   </PropertyGroup>
10 | 
11 | </Project>
12 | 


--------------------------------------------------------------------------------
/vector-database.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.5.33530.505
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VectorIndex", "VectorIndex\VectorIndex.csproj", "{B41A62A3-7E59-4030-8777-37AA72790EDB}"
 7 | EndProject
 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VectorIndex.MainTest", "VectorIndex.MainTest\VectorIndex.MainTest.csproj", "{04DC6990-8492-41A6-9C0A-19C39B315919}"
 9 | EndProject
10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Files", "Files", "{D7D7DEE3-EFCB-486D-B61F-8E2DAD0466E4}"
11 | 	ProjectSection(SolutionItems) = preProject
12 | 		DDL.sql = DDL.sql
13 | 		LICENSE = LICENSE
14 | 		README.md = README.md
15 | 	EndProjectSection
16 | EndProject
17 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemoryVectorIndex", "MemoryVectorIndex\MemoryVectorIndex.csproj", "{C6F45BC6-752D-4B54-86E8-4842254FA110}"
18 | EndProject
19 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MempryVectorIndex.Tests", "MempryVectorIndex.Tests\MempryVectorIndex.Tests.csproj", "{746E60D8-757E-4C27-908C-AA42F8E4A458}"
20 | EndProject
21 | Global
22 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
23 | 		Debug|Any CPU = Debug|Any CPU
24 | 		Release|Any CPU = Release|Any CPU
25 | 	EndGlobalSection
26 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
27 | 		{B41A62A3-7E59-4030-8777-37AA72790EDB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
28 | 		{B41A62A3-7E59-4030-8777-37AA72790EDB}.Debug|Any CPU.Build.0 = Debug|Any CPU
29 | 		{B41A62A3-7E59-4030-8777-37AA72790EDB}.Release|Any CPU.ActiveCfg = Release|Any CPU
30 | 		{B41A62A3-7E59-4030-8777-37AA72790EDB}.Release|Any CPU.Build.0 = Release|Any CPU
31 | 		{04DC6990-8492-41A6-9C0A-19C39B315919}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
32 | 		{04DC6990-8492-41A6-9C0A-19C39B315919}.Debug|Any CPU.Build.0 = Debug|Any CPU
33 | 		{04DC6990-8492-41A6-9C0A-19C39B315919}.Release|Any CPU.ActiveCfg = Release|Any CPU
34 | 		{04DC6990-8492-41A6-9C0A-19C39B315919}.Release|Any CPU.Build.0 = Release|Any CPU
35 | 		{C6F45BC6-752D-4B54-86E8-4842254FA110}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
36 | 		{C6F45BC6-752D-4B54-86E8-4842254FA110}.Debug|Any CPU.Build.0 = Debug|Any CPU
37 | 		{C6F45BC6-752D-4B54-86E8-4842254FA110}.Release|Any CPU.ActiveCfg = Release|Any CPU
38 | 		{C6F45BC6-752D-4B54-86E8-4842254FA110}.Release|Any CPU.Build.0 = Release|Any CPU
39 | 		{746E60D8-757E-4C27-908C-AA42F8E4A458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
40 | 		{746E60D8-757E-4C27-908C-AA42F8E4A458}.Debug|Any CPU.Build.0 = Debug|Any CPU
41 | 		{746E60D8-757E-4C27-908C-AA42F8E4A458}.Release|Any CPU.ActiveCfg = Release|Any CPU
42 | 		{746E60D8-757E-4C27-908C-AA42F8E4A458}.Release|Any CPU.Build.0 = Release|Any CPU
43 | 	EndGlobalSection
44 | 	GlobalSection(SolutionProperties) = preSolution
45 | 		HideSolutionNode = FALSE
46 | 	EndGlobalSection
47 | 	GlobalSection(ExtensibilityGlobals) = postSolution
48 | 		SolutionGuid = {CF23CC1E-AFB8-4517-B126-F03D7F2B5311}
49 | 	EndGlobalSection
50 | EndGlobal
51 | 


--------------------------------------------------------------------------------