├── .devcontainer
├── devcontainer.json
└── docker-compose.yml
├── .gitattributes
├── .github
├── CODE_OF_CONDUCT.md
├── ISSUE_TEMPLATE.md
└── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .vscode
└── launch.json
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── _assets
├── azure-sql-cosine-similarity-vector-type.gif
├── sql-aca-free-tiers.png
├── sql-kmeans-1.png
├── sql-kmeans-2.png
├── sql-kmeans-3.png
├── sql-kmeans-performance.gif
└── wikipedia-voronoi_diagram.png
├── azure.yaml
├── infra
├── core
│ ├── ai
│ │ └── cognitiveservices.bicep
│ ├── database
│ │ ├── cosmos
│ │ │ ├── cosmos-account.bicep
│ │ │ ├── mongo
│ │ │ │ ├── cosmos-mongo-account.bicep
│ │ │ │ └── cosmos-mongo-db.bicep
│ │ │ └── sql
│ │ │ │ ├── cosmos-sql-account.bicep
│ │ │ │ ├── cosmos-sql-db.bicep
│ │ │ │ ├── cosmos-sql-role-assign.bicep
│ │ │ │ └── cosmos-sql-role-def.bicep
│ │ ├── postgresql
│ │ │ └── flexibleserver.bicep
│ │ └── sqlserver
│ │ │ └── sqlserver.bicep
│ ├── gateway
│ │ ├── apim-api-policy.xml
│ │ └── apim.bicep
│ ├── host
│ │ ├── aks-agent-pool.bicep
│ │ ├── aks-managed-cluster.bicep
│ │ ├── aks.bicep
│ │ ├── appservice-appsettings.bicep
│ │ ├── appservice.bicep
│ │ ├── appserviceplan.bicep
│ │ ├── container-app-upsert.bicep
│ │ ├── container-app.bicep
│ │ ├── container-apps-environment.bicep
│ │ ├── container-apps.bicep
│ │ ├── container-registry.bicep
│ │ ├── functions.bicep
│ │ └── staticwebapp.bicep
│ ├── monitor
│ │ ├── applicationinsights-dashboard.bicep
│ │ ├── applicationinsights.bicep
│ │ ├── loganalytics.bicep
│ │ └── monitoring.bicep
│ ├── networking
│ │ ├── cdn-endpoint.bicep
│ │ ├── cdn-profile.bicep
│ │ └── cdn.bicep
│ ├── search
│ │ └── search-services.bicep
│ ├── security
│ │ ├── keyvault-access.bicep
│ │ ├── keyvault-secret.bicep
│ │ ├── keyvault.bicep
│ │ ├── registry-access.bicep
│ │ └── role.bicep
│ └── storage
│ │ └── storage-account.bicep
├── kmeans.bicep
├── kmeans.parameters.json
├── main.bicep
└── main.parameters.json
├── sample-data
├── 01-import-data.sql
├── 02-use-native-vectors.sql
└── readme.md
└── src
├── .dockerignore
├── .env.sample
├── Dockerfile
├── db
├── database.py
├── index.py
├── kmeans.py
└── utils.py
├── internals.py
├── main.py
├── requirements.txt
└── sql
├── 00-create-user.sql
├── 01-sample-queries.sql
├── 02-kmeans-status.sql
├── 03-find-vector-cluster.sql
├── 04-add-new-vector.sql
└── 05-remove-vector.sql
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "dockerComposeFile": "docker-compose.yml",
3 | "service": "app",
4 | "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
5 | "customizations": {
6 | "vscode": {
7 | "extensions": [
8 | "ms-python.python",
9 | "ms-vscode.azurecli"
10 | ],
11 | "settings": {
12 | "python.defaultInterpreterPath": "/opt/conda/bin/python"
13 | }
14 | }
15 | },
16 | "forwardPorts": [
17 | 3000
18 | ]
19 | }
20 |
--------------------------------------------------------------------------------
/.devcontainer/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 | app:
5 | build:
6 | context: ../src
7 | dockerfile: Dockerfile
8 | volumes:
9 | - ../..:/workspaces:cached
10 | command: sleep infinity
11 |
12 | # Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
13 | # user: root
14 |
15 | # Use "forwardPorts" in **devcontainer.json** to forward an app port locally.
16 | # (Adding the "ports" property to this file will not forward from a Codespace.)
17 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Thanks to: https://rehansaeed.com/gitattributes-best-practices/
2 |
3 | # Set default behavior to automatically normalize line endings.
4 | * text=auto
5 |
6 | # Force batch scripts to always use CRLF line endings so that if a repo is accessed
7 | # in Windows via a file share from Linux, the scripts will work.
8 | *.{cmd,[cC][mM][dD]} text eol=crlf
9 | *.{bat,[bB][aA][tT]} text eol=crlf
10 |
11 | # Force bash scripts to always use LF line endings so that if a repo is accessed
12 | # in Unix via a file share from Windows, the scripts will work.
13 | *.sh text eol=lf
--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
4 | > Please provide us with the following information:
5 | > ---------------------------------------------------------------
6 |
7 | ### This issue is for a: (mark with an `x`)
8 | ```
9 | - [ ] bug report -> please search issues before submitting
10 | - [ ] feature request
11 | - [ ] documentation issue or request
12 | - [ ] regression (a behavior that used to work and stopped in a new release)
13 | ```
14 |
15 | ### Minimal steps to reproduce
16 | >
17 |
18 | ### Any log messages given by the failure
19 | >
20 |
21 | ### Expected/desired behavior
22 | >
23 |
24 | ### OS and Version?
25 | > Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?)
26 |
27 | ### Versions
28 | >
29 |
30 | ### Mention any other details that might be useful
31 |
32 | > ---------------------------------------------------------------
33 | > Thanks! We'll be in touch soon.
34 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Purpose
2 |
3 | * ...
4 |
5 | ## Does this introduce a breaking change?
6 |
7 | ```
8 | [ ] Yes
9 | [ ] No
10 | ```
11 |
12 | ## Pull Request Type
13 | What kind of change does this Pull Request introduce?
14 |
15 |
16 | ```
17 | [ ] Bugfix
18 | [ ] Feature
19 | [ ] Code style update (formatting, local variables)
20 | [ ] Refactoring (no functional changes, no api changes)
21 | [ ] Documentation content changes
22 | [ ] Other... Please describe:
23 | ```
24 |
25 | ## How to Test
26 | * Get the code
27 |
28 | ```
29 | git clone [repo-address]
30 | cd [repo-name]
31 | git checkout [branch-name]
32 | npm install
33 | ```
34 |
35 | * Test the code
36 |
37 | ```
38 | ```
39 |
40 | ## What to Check
41 | Verify that the following are valid
42 | * ...
43 |
44 | ## Other Information
45 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Ll]og/
33 | [Ll]ogs/
34 |
35 | # Visual Studio 2015/2017 cache/options directory
36 | .vs/
37 | # Uncomment if you have tasks that create the project's static files in wwwroot
38 | #wwwroot/
39 |
40 | # Visual Studio 2017 auto generated files
41 | Generated\ Files/
42 |
43 | # MSTest test Results
44 | [Tt]est[Rr]esult*/
45 | [Bb]uild[Ll]og.*
46 |
47 | # NUnit
48 | *.VisualState.xml
49 | TestResult.xml
50 | nunit-*.xml
51 |
52 | # Build Results of an ATL Project
53 | [Dd]ebugPS/
54 | [Rr]eleasePS/
55 | dlldata.c
56 |
57 | # Benchmark Results
58 | BenchmarkDotNet.Artifacts/
59 |
60 | # .NET Core
61 | project.lock.json
62 | project.fragment.lock.json
63 | artifacts/
64 |
65 | # ASP.NET Scaffolding
66 | ScaffoldingReadMe.txt
67 |
68 | # StyleCop
69 | StyleCopReport.xml
70 |
71 | # Files built by Visual Studio
72 | *_i.c
73 | *_p.c
74 | *_h.h
75 | *.ilk
76 | *.meta
77 | *.obj
78 | *.iobj
79 | *.pch
80 | *.pdb
81 | *.ipdb
82 | *.pgc
83 | *.pgd
84 | *.rsp
85 | *.sbr
86 | *.tlb
87 | *.tli
88 | *.tlh
89 | *.tmp
90 | *.tmp_proj
91 | *_wpftmp.csproj
92 | *.log
93 | *.tlog
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 |
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 |
304 | # Visual Studio 6 technical files
305 | *.ncb
306 | *.aps
307 |
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 |
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 |
320 | # FAKE - F# Make
321 | .fake/
322 |
323 | # CodeRush personal settings
324 | .cr/personal
325 |
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 |
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 |
334 | # Tabs Studio
335 | *.tss
336 |
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 |
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 |
346 | # OpenCover UI analysis results
347 | OpenCover/
348 |
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 |
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 |
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 |
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 |
361 | # Local History for Visual Studio
362 | .localhistory/
363 |
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 |
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 |
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 |
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 |
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 |
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 |
387 | # Local History for Visual Studio Code
388 | .history/
389 |
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 |
397 | # JetBrains Rider
398 | *.sln.iml
399 |
400 | # Custom
401 | .venv/
402 | *.csv
403 | *.zip
404 | .env
405 | .deploy.env
406 | .azure
407 | *.local.*
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "cwd": "${workspaceFolder}/src",
9 | "name": "Python: FastAPI",
10 | "type": "python",
11 | "request": "launch",
12 | "module": "uvicorn",
13 | "args": [
14 | "main:api",
15 | "--reload"
16 | ],
17 | "jinja": true,
18 | "justMyCode": true
19 | }
20 | ]
21 | }
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## [project-title] Changelog
2 |
3 |
4 | # x.y.z (yyyy-mm-dd)
5 |
6 | *Features*
7 | * ...
8 |
9 | *Bug Fixes*
10 | * ...
11 |
12 | *Breaking Changes*
13 | * ...
14 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to [project-title]
2 |
3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a
4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
5 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
6 |
7 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
8 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
9 | provided by the bot. You will only need to do this once across all repos using our CLA.
10 |
11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 |
15 | - [Code of Conduct](#coc)
16 | - [Issues and Bugs](#issue)
17 | - [Feature Requests](#feature)
18 | - [Submission Guidelines](#submit)
19 |
20 | ## Code of Conduct
21 | Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
22 |
23 | ## Found an Issue?
24 | If you find a bug in the source code or a mistake in the documentation, you can help us by
25 | [submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can
26 | [submit a Pull Request](#submit-pr) with a fix.
27 |
28 | ## Want a Feature?
29 | You can *request* a new feature by [submitting an issue](#submit-issue) to the GitHub
30 | Repository. If you would like to *implement* a new feature, please submit an issue with
31 | a proposal for your work first, to be sure that we can use it.
32 |
33 | * **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr).
34 |
35 | ## Submission Guidelines
36 |
37 | ### Submitting an Issue
38 | Before you submit an issue, search the archive, maybe your question was already answered.
39 |
40 | If your issue appears to be a bug, and hasn't been reported, open a new issue.
41 | Help us to maximize the effort we can spend fixing issues and adding new
42 | features, by not reporting duplicate issues. Providing the following information will increase the
43 | chances of your issue being dealt with quickly:
44 |
45 | * **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps
46 | * **Version** - what version is affected (e.g. 0.1.2)
47 | * **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you
48 | * **Browsers and Operating System** - is this a problem with all browsers?
49 | * **Reproduce the Error** - provide a live example or a unambiguous set of steps
50 | * **Related Issues** - has a similar issue been reported before?
51 | * **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
52 | causing the problem (line of code or commit)
53 |
54 | You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/[organization-name]/[repository-name]/issues/new].
55 |
56 | ### Submitting a Pull Request (PR)
57 | Before you submit your Pull Request (PR) consider the following guidelines:
58 |
59 | * Search the repository (https://github.com/[organization-name]/[repository-name]/pulls) for an open or closed PR
60 | that relates to your submission. You don't want to duplicate effort.
61 |
62 | * Make your changes in a new git fork:
63 |
64 | * Commit your changes using a descriptive commit message
65 | * Push your fork to GitHub:
66 | * In GitHub, create a pull request
67 | * If we suggest changes then:
68 | * Make the required updates.
69 | * Rebase your fork and force push to your GitHub repository (this will update your Pull Request):
70 |
71 | ```shell
72 | git rebase master -i
73 | git push -f
74 | ```
75 |
76 | That's it! Thank you for your contribution!
77 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Azure SQL DB Vector - KMeans Compute Node
2 |
3 | Perform Approximate Nearest Neighbor (ANN) search on a vector column in Azure SQL DB using KMeans clustering.
4 |
5 | As KMeans clustering is a compute intensive operation, this project uses SciKit Learn library to perform the clustering and stores the results in a SQL DB table. The results are then used to perform ANN search on the vector column.
6 |
7 | To make the integration with SQL DB seamless, this project uses Azure Container Apps and expose the KMeans clustering as a REST API. The entire repository can be deployed to Azure using the [free services](https://azure.microsoft.com/en-us/pricing/free-services):
8 |
9 | 
10 |
11 | Vector data is stored in Azure SQL with no additional dependencies as shown in this repository: https://github.com/Azure-Samples/azure-sql-db-openai. The same dataset is used also in this project.
12 |
13 | Azure SQL database can be used to easily and quickly perform vector similarity search. There are two options for this: a *native* option and a *classic* option.
14 |
15 | ## Native or Classic?
16 |
17 | The **native option** is to use the new Vector Functions, recently introduced in Azure SQL database. Vector Functions are a set of functions that can be used to perform vector operations directly in the database.
18 |
19 | > [!NOTE]
20 | > Vector Functions are in Public Preview. Learn the details about vectors in Azure SQL here: https://aka.ms/azure-sql-vector-public-preview
21 |
22 | 
23 |
24 | The **classic option** is to use the classic T-SQL to perform vector operations, with the support for columnstore indexes for getting good performances.
25 |
26 | > [!IMPORTANT]
27 | > This branch (the `main` branch) uses the native vector support in Azure SQL. If you want to use the classic T-SQL, switch to the `classic` branch.
28 |
29 | If you want to experiment locally, the project [can be run locally](#run-the-project-locally) using [Dev Container](https://code.visualstudio.com/docs/devcontainers/containers) and Docker.
30 |
31 | ## Table of Contents
32 |
33 | - [Vector Search Optimization](#vector-search-optimization-via-voronoi-cells-and-inverted-file-index-aka-cell-probing)
34 | - [Architecture](#architecture)
35 | - [Run the project locally](#run-the-project-locally)
36 | - [Deploy the project to Azure](#deploy-the-project-to-azure)
37 | - [Use the REST API](#rest-api)
38 | - [Search for similar vectors](#search-for-similar-vectors)
39 | - [Performances](#performances)
40 | - [Adding a new vector](#adding-a-new-vector)
41 |
42 | ## Vector Search Optimization via Voronoi Cells and Inverted File Index (aka "Cell-Probing")
43 |
44 | Given a vector, finding the most similar vector among all those stored in a database is a common problem in many applications. The easiest approach to solve this problem is to use a brute force approach, which is to compute the distance between the query vector and all the vectors stored in the database. This is a good approach when the number of vectors is not extremely big, and dimensionality of vectors is not very high, as it guarantees *perfect [recall](https://en.wikipedia.org/wiki/Precision_and_recall)*, meaning that all relevant items that should be returned are actually returned.
45 |
46 | Unfortunately this approach is not scalable as the number of vectors stored in the database increases, so you may want to exchange a perfect recall for much better performances. This is where *approximate nearest neighbor* (ANN) search comes into play. ANN search algorithms are able to return the most similar vectors to the query vector, but they do not guarantee perfect recall. In other words, they may return less vectors than all the relevant to the query vector, but they are much faster than the brute force approach.
47 |
48 | To speed up the search, it is possible to split the vectors into groups, making sure the create groups so that all vectors that are someone similar to each other are put in the same group. This is the idea behind *Voronoi cells*.
49 |
50 | 
51 |
52 | The idea is to create a set of *centroids* (i.e. vectors) and then assign each vector to the closest centroid. This way, all vectors that are similar to each other will be assigned to the same centroid. This is a very fast operation, as it is just a matter of computing the distance between the vector and all the centroids and then assign the vector to the closest centroid. Once all vectors are assigned to a centroid, it is possible to create a *inverted file index* that maps each centroid to the list of vectors assigned to it. This way, when a query vector is given, it is possible to find the closest centroid and then return all the vectors assigned to it. This is much faster than computing the distance between the query vector and all the vectors stored in the database.
53 |
54 | This project uses KMeans clustering to create the centroids and then create the inverted file index. KMeans clustering is a very popular clustering algorithm that is able to create a given number of clusters (i.e. centroids) by iteratively moving the centroids to the center of the vectors assigned to them. The number of clusters is a parameter that can be tuned to trade off recall and performances. The more clusters are created, the better the recall, but the slower the search. The less clusters are created, the worse the recall, but the faster the search.
55 |
56 | In this repo the number of cluster is determined by the following code:
57 |
58 | ```python
59 | if (vector_count > 1000000):
60 | clusters = int(math.sqrt(vector_count))
61 | else:
62 | clusters = int(vector_count / 1000) * 2
63 | ```
64 |
65 | ## Architecture
66 |
67 | The architecture of the project is very simple as it is composed of a single container that exposes a REST API to build and rebuild the index and to search for similar vectors. The container is deployed to Azure Container Apps and uses Azure SQL DB to store the vectors and the clusters.
68 |
69 | The idea is that compute intensive operations, like calculating KMeans, can be offloaded to dedicated container that is easy to deploy, quick to start and offers serverless scaling for the best performance/cost ratio.
70 |
71 | 
72 |
73 | Once the container is running it is completely independent from the database and can do its work without affecting database performances at all. Even better, if more scalability is needed, data can be partitioned across multiple container instances to achieve parallelism
74 |
75 | 
76 |
77 | Once the model has been trained, the identified clusters and centroids - and thus the IVF index - are saved back to the SQL DB so that they can be used to perform ANN search on the vector column without the need for the container to remain active. If fact, the container can be stopped completely as SQL DB is completely autonomous now.
78 |
79 | 
80 |
81 | The data stored back into SQL DB using the following tables:
82 |
83 | - `[$vector].[kmeans]`: stores information about created indexes
84 | - `[$vector].[$$clusters_centroids]`: stores the centroids
85 | - `[$vector].[$$clusters]`: the IVF structure, associating each centroid to the list of vectors assigned to it
86 |
87 | to make the search even easier a function is created also:
88 |
89 | - `[$vector].[find_similar$$](, , )`: the function to perform ANN search
90 |
91 | The function calculates the dot product which is the same as the cosine similarity if vectors are normalized to 1.
92 |
93 | Also the function:
94 |
95 | - `[$vector].[find_cluster$$]()`: find the cluster of a given vector
96 |
97 | is provided as it is needed to insert new vectors into the IVF index.
98 |
99 | ## Run the project locally
100 |
101 | The project take advantage of [Dev Container](https://code.visualstudio.com/docs/devcontainers/containers) to run the project locally. Make sure to have Docker Desktop installed and running on your machine.
102 |
103 | Clone the repository and open it in VS Code. You'll be prompted to reopen the project in a Dev Container. Click on the "Reopen in Container" button. The Dev Container sets up the container needed to run Scikit Learn. Since native vector support is now available only in Azure SQL, you need to have an Azure SQL DB to run this sample. You can use the free tier as mentioned at the beginning of the readme.
104 |
105 | Create an Azure SQL database named `vectordb` and then import the `dbo.wikipedia_articles_embeddings` table following the documentation in the [`sample_data`](../sample_data) folder before proceeding further.
106 |
107 | You can use [Azure Data Studio](https://learn.microsoft.com/en-us/azure-data-studio/download-azure-data-studio) to connect to the SQL DB and run queries against it.
108 |
109 | ### Import sample dataset
110 |
111 | Follow the instructions in the `/sample-data` folder to download the sample dataset. Once the `vector_database_wikipedia_articles_embedded.csv` is available you can import it into the MSSQL database using the script
112 |
113 | - `src/sql/01-import-data.sql`
114 |
115 | and the alter the table to use native vectors using the script
116 |
117 | - `src/sql/02-use-native-vectors.sql`
118 |
119 | ### Run the application
120 |
121 | Create a `.env` file in `src` folder starting from the `.env.sample`. Add you server name to the connection string and then, from a VS Code terminal, run the following command:
122 |
123 | ```bash
124 | cd src
125 | uvicorn main:api
126 | ```
127 |
128 | and you'll be good to go. The API will be available at http://127.0.0.1:8000.
129 |
130 | You can now run the KMeans clustering algorithm using the commands as described in the [REST API](#rest-api) section.
131 |
132 | ## Deploy the project to Azure
133 |
134 | Deployment to Azure is done using [AZD CLI](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd).
135 |
136 | ### Install AZD CLI
137 |
138 | You need to install it before running and deploying with the Azure Developer CLI.
139 |
140 | On Windows:
141 |
142 | ```powershell
143 | powershell -ex AllSigned -c "Invoke-RestMethod 'https://aka.ms/install-azd.ps1' | Invoke-Expression"
144 | ```
145 |
146 | On Linux/MacOS:
147 |
148 | ```bash
149 | curl -fsSL https://aka.ms/install-azd.sh | bash
150 | ```
151 |
152 | After logging in with the following command, you will be able to use azd cli to quickly provision and deploy the application.
153 |
154 | ### Authenticate with Azure
155 |
156 | Make sure AZD CLI can access Azure resources. You can use the following command to log in to Azure:
157 |
158 | ```bash
159 | azd auth login
160 | ```
161 |
162 | ### Deploy the database
163 |
164 | Follow the steps defined in the [Azure SQL DB OpenAI](https://github.com/Azure-Samples/azure-sql-db-openai?tab=readme-ov-file#download-and-import-the-wikipedia-article-with-vector-embeddings) repository to deploy the database and import the sample dataset. And the end of the process you'll have a table named `dbo.wikipedia_articles_embeddings` with the vector data.
165 |
166 | Then use the following script
167 |
168 | - `src/sql/00-create-user.sql`
169 |
170 | to create a user that will be used by Python to access the database.
171 |
172 | ### Deploy the application
173 |
174 | Initialize the Azure Developer CLI with the following command:
175 |
176 | ```bash
177 | azd init
178 | ```
179 |
180 | and then set the `MSSQL` variable to the connection string pointing to the Azure SQL DB:
181 |
182 | ```bash
183 | azd env set MSSQL 'Server=tcp:.database.windows.net,1433;Initial Catalog=;Persist Security Info=False;User ID=;Password=;'
184 | ```
185 |
186 | finally deploy the application to Azure with the following command:
187 |
188 | ```bash
189 | azd up
190 | ```
191 |
192 | after a few minutes the container will be deployed into an Azure Container Apps and ready to accept requests.
193 |
194 | ## REST API
195 |
196 | KMeans model from Scikit Learn is executed within a container as a REST endpoint. The API exposed by the container are:
197 |
198 | - Server Status: `GET /`
199 | - Build Index: `POST /kmeans/build`
200 | - Rebuild Index: `POST /kmeans/rebuild`
201 |
202 | Both Build and Rebuild API are asynchronous. The Server Status API can be used to check the status of the build process.
203 |
204 | ### Build Index
205 |
206 | To build an index from scratch, the Build API expects the following payload:
207 |
208 | ```
209 | {
210 | "table": {
211 | "schema": ,
212 | "name":
213 | },
214 | "column": {
215 | "id": ,
216 | "vector":
217 | },
218 | "vector": {
219 | "dimensions":
220 | }
221 | }
222 | ```
223 |
224 | Using the aforementioned wikipedia dataset, the payload would be:
225 |
226 | ```http
227 | POST /kmeans/build
228 | {
229 | "table": {
230 | "schema": "dbo",
231 | "name": "wikipedia_articles_embeddings"
232 | },
233 | "column": {
234 | "id": "id",
235 | "vector": "title_vector_ada2"
236 | },
237 | "vector": {
238 | "dimensions": 1536
239 | }
240 | }
241 | ```
242 |
243 | The API would verify that the request is correct and then start the build process asynchronously returning the id assigned to the index being created:
244 |
245 | ```
246 | {
247 | "server": {
248 | "status": {
249 | "current": "initializing",
250 | "last": "idle"
251 | },
252 | "index_id": "1"
253 | },
254 | "version": "0.0.2"
255 | }
256 | ```
257 |
258 | And index on the same table and vector column already exists, the API would return an error. If you want to force the creation of a new index over the existing one you can use the `force` option:
259 |
260 | ```http
261 | POST /kmeans/build?force=true
262 | ```
263 |
264 | ### Rebuild Index
265 |
266 | If you need to rebuild an existing index, you can use the Rebuild API. The API doesn't need a payload as it will use the existing index definition. Just like the build process, also the rebuild process is asynchronous. The index to be rebuilt is specified via URL path:
267 |
268 | ```
269 | POST /kmeans/rebuild/
270 | ```
271 |
272 | for example, to rebuild the index with id 1:
273 |
274 | ```http
275 | POST /kmeans/rebuild/1
276 | ```
277 |
278 | ### Query API Status
279 |
280 | The status of the build process can be checked using the Server Status API:
281 |
282 | ```http
283 | GET /
284 | ```
285 |
286 | and you'll get the current status and the last status reported:
287 |
288 | ```json
289 | {
290 | "server": {
291 | "status": {
292 | "current": "building",
293 | "last": "initializing"
294 | },
295 | "index_id": 1
296 | },
297 | "version": "0.0.2"
298 | }
299 | ```
300 |
301 | Checking the last status is useful to understand if an error occurred during the build process.
302 |
303 | You can also check the index build status by querying the `[$vector].[kmeans]` table.
304 |
305 | ## Search for similar vectors
306 |
307 | Once you have built the index, you can search for similar vectors. Using the sample dataset, you can search for the 10 most similar articles to 'Isaac Asimov' using the `find_similar` function that has been created as part of the index build process. For example:
308 |
309 | ```sql
310 | -- Store the vector representing 'Isaac Asimov' in a variable
311 | declare @v varbinary(8000);
312 | select @v = content_vector from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov';
313 |
314 | -- Find the 10 most similar articles to 'Isaac Asimov' based on the title vector
315 | -- searching only in the closest cluster
316 | select * from [$vector].find_similar$wikipedia_articles_embeddings$content_vector(@v, 10, 1, 0.75) order by dot_product desc
317 | ```
318 |
319 | The `find_similar` function takes 3 parameters:
320 |
321 | - the vector to search for
322 | - the number of similar vectors to return
323 | - the number of clusters to search in
324 | - the similarity threshold
325 |
326 | The similarity threshold is used to filter out vectors that are not similar enough to the query vector. The higher the threshold, the more similar the vectors returned will be. The number of clusters to search in is used to speed up the search. The higher the number of clusters, the more similar the vectors returned will be. The lower the number of clusters, the faster the search will be.
327 |
328 | ## Performances
329 |
330 | As visible in this gif, the performance improvement is quite substantial. The gif shows the execution of the `find_similar` function with different number of probed clusters.
331 |
332 | 
333 |
334 | ## Adding a new vector
335 |
336 | To add a new vector to the index, you can use the `find_cluster` function to find the cluster of the new vector and then insert the vector into the corresponding cluster. A full example is provided in the `src/sql/06-add-new-vector.sql` script.
337 |
338 | Adding a new vector to the index can deteriorate the quality of the index as new centroids are not calculated, so it is recommended to rebuild the index after adding a significant number of new vectors, to create new centroids and reassign the vectors to the new centroids.
339 |
340 | ## References
341 |
342 | The Voronoi Cells image is from Wikipedia: https://en.wikipedia.org/wiki/Voronoi_diagram#/media/File:Euclidean_Voronoi_diagram.svg, used under the [CC BY-SA 4.0 DEED](https://creativecommons.org/licenses/by-sa/4.0/) license.
343 |
--------------------------------------------------------------------------------
/_assets/azure-sql-cosine-similarity-vector-type.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/azure-sql-cosine-similarity-vector-type.gif
--------------------------------------------------------------------------------
/_assets/sql-aca-free-tiers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/sql-aca-free-tiers.png
--------------------------------------------------------------------------------
/_assets/sql-kmeans-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/sql-kmeans-1.png
--------------------------------------------------------------------------------
/_assets/sql-kmeans-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/sql-kmeans-2.png
--------------------------------------------------------------------------------
/_assets/sql-kmeans-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/sql-kmeans-3.png
--------------------------------------------------------------------------------
/_assets/sql-kmeans-performance.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/sql-kmeans-performance.gif
--------------------------------------------------------------------------------
/_assets/wikipedia-voronoi_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-sql-db-vectors-kmeans/ca34cc8893562f3d1e22dad96f90f6c053a0377e/_assets/wikipedia-voronoi_diagram.png
--------------------------------------------------------------------------------
/azure.yaml:
--------------------------------------------------------------------------------
1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
2 |
3 | name: azure-sql-db-vectors-kmeans
4 | metadata:
5 | template: azure-sql-db-vectors-kmeans@0.0.1
6 | services:
7 | kmeans:
8 | project: ./src
9 | language: py
10 | host: containerapp
11 |
--------------------------------------------------------------------------------
/infra/core/ai/cognitiveservices.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 | @description('The custom subdomain name used to access the API. Defaults to the value of the name parameter.')
5 | param customSubDomainName string = name
6 | param deployments array = []
7 | param kind string = 'OpenAI'
8 | param publicNetworkAccess string = 'Enabled'
9 | param sku object = {
10 | name: 'S0'
11 | }
12 |
13 | resource account 'Microsoft.CognitiveServices/accounts@2022-10-01' = {
14 | name: name
15 | location: location
16 | tags: tags
17 | kind: kind
18 | properties: {
19 | customSubDomainName: customSubDomainName
20 | publicNetworkAccess: publicNetworkAccess
21 | }
22 | sku: sku
23 | }
24 |
25 | @batchSize(1)
26 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2022-10-01' = [for deployment in deployments: {
27 | parent: account
28 | name: deployment.name
29 | properties: {
30 | model: deployment.model
31 | raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
32 | scaleSettings: deployment.scaleSettings
33 | }
34 | }]
35 |
36 | output endpoint string = account.properties.endpoint
37 | output id string = account.id
38 | output name string = account.name
39 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/cosmos-account.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param connectionStringKey string = 'AZURE-COSMOS-CONNECTION-STRING'
6 | param keyVaultName string
7 |
8 | @allowed([ 'GlobalDocumentDB', 'MongoDB', 'Parse' ])
9 | param kind string
10 |
11 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' = {
12 | name: name
13 | kind: kind
14 | location: location
15 | tags: tags
16 | properties: {
17 | consistencyPolicy: { defaultConsistencyLevel: 'Session' }
18 | locations: [
19 | {
20 | locationName: location
21 | failoverPriority: 0
22 | isZoneRedundant: false
23 | }
24 | ]
25 | databaseAccountOfferType: 'Standard'
26 | enableAutomaticFailover: false
27 | enableMultipleWriteLocations: false
28 | apiProperties: (kind == 'MongoDB') ? { serverVersion: '4.0' } : {}
29 | capabilities: [ { name: 'EnableServerless' } ]
30 | }
31 | }
32 |
33 | resource cosmosConnectionString 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = {
34 | parent: keyVault
35 | name: connectionStringKey
36 | properties: {
37 | value: cosmos.listConnectionStrings().connectionStrings[0].connectionString
38 | }
39 | }
40 |
41 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = {
42 | name: keyVaultName
43 | }
44 |
45 | output connectionStringKey string = connectionStringKey
46 | output endpoint string = cosmos.properties.documentEndpoint
47 | output id string = cosmos.id
48 | output name string = cosmos.name
49 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/mongo/cosmos-mongo-account.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param keyVaultName string
6 | param connectionStringKey string = 'AZURE-COSMOS-CONNECTION-STRING'
7 |
8 | module cosmos '../../cosmos/cosmos-account.bicep' = {
9 | name: 'cosmos-account'
10 | params: {
11 | name: name
12 | location: location
13 | connectionStringKey: connectionStringKey
14 | keyVaultName: keyVaultName
15 | kind: 'MongoDB'
16 | tags: tags
17 | }
18 | }
19 |
20 | output connectionStringKey string = cosmos.outputs.connectionStringKey
21 | output endpoint string = cosmos.outputs.endpoint
22 | output id string = cosmos.outputs.id
23 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/mongo/cosmos-mongo-db.bicep:
--------------------------------------------------------------------------------
1 | param accountName string
2 | param databaseName string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | param collections array = []
7 | param connectionStringKey string = 'AZURE-COSMOS-CONNECTION-STRING'
8 | param keyVaultName string
9 |
10 | module cosmos 'cosmos-mongo-account.bicep' = {
11 | name: 'cosmos-mongo-account'
12 | params: {
13 | name: accountName
14 | location: location
15 | keyVaultName: keyVaultName
16 | tags: tags
17 | connectionStringKey: connectionStringKey
18 | }
19 | }
20 |
21 | resource database 'Microsoft.DocumentDB/databaseAccounts/mongodbDatabases@2022-08-15' = {
22 | name: '${accountName}/${databaseName}'
23 | tags: tags
24 | properties: {
25 | resource: { id: databaseName }
26 | }
27 |
28 | resource list 'collections' = [for collection in collections: {
29 | name: collection.name
30 | properties: {
31 | resource: {
32 | id: collection.id
33 | shardKey: { _id: collection.shardKey }
34 | indexes: [ { key: { keys: [ collection.indexKey ] } } ]
35 | }
36 | }
37 | }]
38 |
39 | dependsOn: [
40 | cosmos
41 | ]
42 | }
43 |
44 | output connectionStringKey string = connectionStringKey
45 | output databaseName string = databaseName
46 | output endpoint string = cosmos.outputs.endpoint
47 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/sql/cosmos-sql-account.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param keyVaultName string
6 |
7 | module cosmos '../../cosmos/cosmos-account.bicep' = {
8 | name: 'cosmos-account'
9 | params: {
10 | name: name
11 | location: location
12 | tags: tags
13 | keyVaultName: keyVaultName
14 | kind: 'GlobalDocumentDB'
15 | }
16 | }
17 |
18 | output connectionStringKey string = cosmos.outputs.connectionStringKey
19 | output endpoint string = cosmos.outputs.endpoint
20 | output id string = cosmos.outputs.id
21 | output name string = cosmos.outputs.name
22 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/sql/cosmos-sql-db.bicep:
--------------------------------------------------------------------------------
1 | param accountName string
2 | param databaseName string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | param containers array = []
7 | param keyVaultName string
8 | param principalIds array = []
9 |
10 | module cosmos 'cosmos-sql-account.bicep' = {
11 | name: 'cosmos-sql-account'
12 | params: {
13 | name: accountName
14 | location: location
15 | tags: tags
16 | keyVaultName: keyVaultName
17 | }
18 | }
19 |
20 | resource database 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2022-05-15' = {
21 | name: '${accountName}/${databaseName}'
22 | properties: {
23 | resource: { id: databaseName }
24 | }
25 |
26 | resource list 'containers' = [for container in containers: {
27 | name: container.name
28 | properties: {
29 | resource: {
30 | id: container.id
31 | partitionKey: { paths: [ container.partitionKey ] }
32 | }
33 | options: {}
34 | }
35 | }]
36 |
37 | dependsOn: [
38 | cosmos
39 | ]
40 | }
41 |
42 | module roleDefintion 'cosmos-sql-role-def.bicep' = {
43 | name: 'cosmos-sql-role-definition'
44 | params: {
45 | accountName: accountName
46 | }
47 | dependsOn: [
48 | cosmos
49 | database
50 | ]
51 | }
52 |
53 | // We need batchSize(1) here because sql role assignments have to be done sequentially
54 | @batchSize(1)
55 | module userRole 'cosmos-sql-role-assign.bicep' = [for principalId in principalIds: if (!empty(principalId)) {
56 | name: 'cosmos-sql-user-role-${uniqueString(principalId)}'
57 | params: {
58 | accountName: accountName
59 | roleDefinitionId: roleDefintion.outputs.id
60 | principalId: principalId
61 | }
62 | dependsOn: [
63 | cosmos
64 | database
65 | ]
66 | }]
67 |
68 | output accountId string = cosmos.outputs.id
69 | output accountName string = cosmos.outputs.name
70 | output connectionStringKey string = cosmos.outputs.connectionStringKey
71 | output databaseName string = databaseName
72 | output endpoint string = cosmos.outputs.endpoint
73 | output roleDefinitionId string = roleDefintion.outputs.id
74 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/sql/cosmos-sql-role-assign.bicep:
--------------------------------------------------------------------------------
1 | param accountName string
2 |
3 | param roleDefinitionId string
4 | param principalId string = ''
5 |
6 | resource role 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2022-05-15' = {
7 | parent: cosmos
8 | name: guid(roleDefinitionId, principalId, cosmos.id)
9 | properties: {
10 | principalId: principalId
11 | roleDefinitionId: roleDefinitionId
12 | scope: cosmos.id
13 | }
14 | }
15 |
16 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = {
17 | name: accountName
18 | }
19 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/sql/cosmos-sql-role-def.bicep:
--------------------------------------------------------------------------------
1 | param accountName string
2 |
3 | resource roleDefinition 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions@2022-08-15' = {
4 | parent: cosmos
5 | name: guid(cosmos.id, accountName, 'sql-role')
6 | properties: {
7 | assignableScopes: [
8 | cosmos.id
9 | ]
10 | permissions: [
11 | {
12 | dataActions: [
13 | 'Microsoft.DocumentDB/databaseAccounts/readMetadata'
14 | 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/items/*'
15 | 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/*'
16 | ]
17 | notDataActions: []
18 | }
19 | ]
20 | roleName: 'Reader Writer'
21 | type: 'CustomRole'
22 | }
23 | }
24 |
25 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = {
26 | name: accountName
27 | }
28 |
29 | output id string = roleDefinition.id
30 |
--------------------------------------------------------------------------------
/infra/core/database/postgresql/flexibleserver.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param sku object
6 | param storage object
7 | param administratorLogin string
8 | @secure()
9 | param administratorLoginPassword string
10 | param databaseNames array = []
11 | param allowAzureIPsFirewall bool = false
12 | param allowAllIPsFirewall bool = false
13 | param allowedSingleIPs array = []
14 |
15 | // PostgreSQL version
16 | param version string
17 |
18 | // Latest official version 2022-12-01 does not have Bicep types available
19 | resource postgresServer 'Microsoft.DBforPostgreSQL/flexibleServers@2022-12-01' = {
20 | location: location
21 | tags: tags
22 | name: name
23 | sku: sku
24 | properties: {
25 | version: version
26 | administratorLogin: administratorLogin
27 | administratorLoginPassword: administratorLoginPassword
28 | storage: storage
29 | highAvailability: {
30 | mode: 'Disabled'
31 | }
32 | }
33 |
34 | resource database 'databases' = [for name in databaseNames: {
35 | name: name
36 | }]
37 |
38 | resource firewall_all 'firewallRules' = if (allowAllIPsFirewall) {
39 | name: 'allow-all-IPs'
40 | properties: {
41 | startIpAddress: '0.0.0.0'
42 | endIpAddress: '255.255.255.255'
43 | }
44 | }
45 |
46 | resource firewall_azure 'firewallRules' = if (allowAzureIPsFirewall) {
47 | name: 'allow-all-azure-internal-IPs'
48 | properties: {
49 | startIpAddress: '0.0.0.0'
50 | endIpAddress: '0.0.0.0'
51 | }
52 | }
53 |
54 | resource firewall_single 'firewallRules' = [for ip in allowedSingleIPs: {
55 | name: 'allow-single-${replace(ip, '.', '')}'
56 | properties: {
57 | startIpAddress: ip
58 | endIpAddress: ip
59 | }
60 | }]
61 |
62 | }
63 |
64 | output POSTGRES_DOMAIN_NAME string = postgresServer.properties.fullyQualifiedDomainName
65 |
--------------------------------------------------------------------------------
/infra/core/database/sqlserver/sqlserver.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param appUser string = 'appUser'
6 | param databaseName string
7 | param keyVaultName string
8 | param sqlAdmin string = 'sqlAdmin'
9 | param connectionStringKey string = 'AZURE-SQL-CONNECTION-STRING'
10 |
11 | @secure()
12 | param sqlAdminPassword string
13 | @secure()
14 | param appUserPassword string
15 |
16 | resource sqlServer 'Microsoft.Sql/servers@2022-05-01-preview' = {
17 | name: name
18 | location: location
19 | tags: tags
20 | properties: {
21 | version: '12.0'
22 | minimalTlsVersion: '1.2'
23 | publicNetworkAccess: 'Enabled'
24 | administratorLogin: sqlAdmin
25 | administratorLoginPassword: sqlAdminPassword
26 | }
27 |
28 | resource database 'databases' = {
29 | name: databaseName
30 | location: location
31 | }
32 |
33 | resource firewall 'firewallRules' = {
34 | name: 'Azure Services'
35 | properties: {
36 | // Allow all clients
37 | // Note: range [0.0.0.0-0.0.0.0] means "allow all Azure-hosted clients only".
38 | // This is not sufficient, because we also want to allow direct access from developer machine, for debugging purposes.
39 | startIpAddress: '0.0.0.1'
40 | endIpAddress: '255.255.255.254'
41 | }
42 | }
43 | }
44 |
45 | resource sqlDeploymentScript 'Microsoft.Resources/deploymentScripts@2020-10-01' = {
46 | name: '${name}-deployment-script'
47 | location: location
48 | kind: 'AzureCLI'
49 | properties: {
50 | azCliVersion: '2.37.0'
51 | retentionInterval: 'PT1H' // Retain the script resource for 1 hour after it ends running
52 | timeout: 'PT5M' // Five minutes
53 | cleanupPreference: 'OnSuccess'
54 | environmentVariables: [
55 | {
56 | name: 'APPUSERNAME'
57 | value: appUser
58 | }
59 | {
60 | name: 'APPUSERPASSWORD'
61 | secureValue: appUserPassword
62 | }
63 | {
64 | name: 'DBNAME'
65 | value: databaseName
66 | }
67 | {
68 | name: 'DBSERVER'
69 | value: sqlServer.properties.fullyQualifiedDomainName
70 | }
71 | {
72 | name: 'SQLCMDPASSWORD'
73 | secureValue: sqlAdminPassword
74 | }
75 | {
76 | name: 'SQLADMIN'
77 | value: sqlAdmin
78 | }
79 | ]
80 |
81 | scriptContent: '''
82 | wget https://github.com/microsoft/go-sqlcmd/releases/download/v0.8.1/sqlcmd-v0.8.1-linux-x64.tar.bz2
83 | tar x -f sqlcmd-v0.8.1-linux-x64.tar.bz2 -C .
84 |
85 | cat < ./initDb.sql
86 | drop user ${APPUSERNAME}
87 | go
88 | create user ${APPUSERNAME} with password = '${APPUSERPASSWORD}'
89 | go
90 | alter role db_owner add member ${APPUSERNAME}
91 | go
92 | SCRIPT_END
93 |
94 | ./sqlcmd -S ${DBSERVER} -d ${DBNAME} -U ${SQLADMIN} -i ./initDb.sql
95 | '''
96 | }
97 | }
98 |
99 | resource sqlAdminPasswordSecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = {
100 | parent: keyVault
101 | name: 'sqlAdminPassword'
102 | properties: {
103 | value: sqlAdminPassword
104 | }
105 | }
106 |
107 | resource appUserPasswordSecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = {
108 | parent: keyVault
109 | name: 'appUserPassword'
110 | properties: {
111 | value: appUserPassword
112 | }
113 | }
114 |
115 | resource sqlAzureConnectionStringSercret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = {
116 | parent: keyVault
117 | name: connectionStringKey
118 | properties: {
119 | value: '${connectionString}; Password=${appUserPassword}'
120 | }
121 | }
122 |
123 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = {
124 | name: keyVaultName
125 | }
126 |
127 | var connectionString = 'Server=${sqlServer.properties.fullyQualifiedDomainName}; Database=${sqlServer::database.name}; User=${appUser}'
128 | output connectionStringKey string = connectionStringKey
129 | output databaseName string = sqlServer::database.name
130 |
--------------------------------------------------------------------------------
/infra/core/gateway/apim-api-policy.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | {0}
9 |
10 |
11 | PUT
12 | GET
13 | POST
14 | DELETE
15 | PATCH
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | Call to the @(context.Api.Name)
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | Failed to process the @(context.Api.Name)
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 | We're Sorry. An unexpected error has occurred. If this continues please contact Tech Support.
90 |
91 |
92 |
--------------------------------------------------------------------------------
/infra/core/gateway/apim.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | @description('The email address of the owner of the service')
6 | @minLength(1)
7 | param publisherEmail string = 'noreply@microsoft.com'
8 |
9 | @description('The name of the owner of the service')
10 | @minLength(1)
11 | param publisherName string = 'n/a'
12 |
13 | @description('The pricing tier of this API Management service')
14 | @allowed([
15 | 'Consumption'
16 | 'Developer'
17 | 'Standard'
18 | 'Premium'
19 | ])
20 | param sku string = 'Consumption'
21 |
22 | @description('The instance size of this API Management service.')
23 | @allowed([ 0, 1, 2 ])
24 | param skuCount int = 0
25 |
26 | @description('Azure Application Insights Name')
27 | param applicationInsightsName string
28 |
29 | resource apimService 'Microsoft.ApiManagement/service@2021-08-01' = {
30 | name: name
31 | location: location
32 | tags: union(tags, { 'azd-service-name': name })
33 | sku: {
34 | name: sku
35 | capacity: (sku == 'Consumption') ? 0 : ((sku == 'Developer') ? 1 : skuCount)
36 | }
37 | properties: {
38 | publisherEmail: publisherEmail
39 | publisherName: publisherName
40 | // Custom properties are not supported for Consumption SKU
41 | customProperties: sku == 'Consumption' ? {} : {
42 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA': 'false'
43 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA': 'false'
44 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_RSA_WITH_AES_128_GCM_SHA256': 'false'
45 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_RSA_WITH_AES_256_CBC_SHA256': 'false'
46 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_RSA_WITH_AES_128_CBC_SHA256': 'false'
47 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_RSA_WITH_AES_256_CBC_SHA': 'false'
48 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TLS_RSA_WITH_AES_128_CBC_SHA': 'false'
49 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Ciphers.TripleDes168': 'false'
50 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Protocols.Tls10': 'false'
51 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Protocols.Tls11': 'false'
52 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Protocols.Ssl30': 'false'
53 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Backend.Protocols.Tls10': 'false'
54 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Backend.Protocols.Tls11': 'false'
55 | 'Microsoft.WindowsAzure.ApiManagement.Gateway.Security.Backend.Protocols.Ssl30': 'false'
56 | }
57 | }
58 | }
59 |
60 | resource apimLogger 'Microsoft.ApiManagement/service/loggers@2021-12-01-preview' = if (!empty(applicationInsightsName)) {
61 | name: 'app-insights-logger'
62 | parent: apimService
63 | properties: {
64 | credentials: {
65 | instrumentationKey: applicationInsights.properties.InstrumentationKey
66 | }
67 | description: 'Logger to Azure Application Insights'
68 | isBuffered: false
69 | loggerType: 'applicationInsights'
70 | resourceId: applicationInsights.id
71 | }
72 | }
73 |
74 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) {
75 | name: applicationInsightsName
76 | }
77 |
78 | output apimServiceName string = apimService.name
79 |
--------------------------------------------------------------------------------
/infra/core/host/aks-agent-pool.bicep:
--------------------------------------------------------------------------------
1 | param clusterName string
2 |
3 | @description('The agent pool name')
4 | param name string
5 |
6 | @description('The agent pool configuration')
7 | param config object
8 |
9 | resource aksCluster 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' existing = {
10 | name: clusterName
11 | }
12 |
13 | resource nodePool 'Microsoft.ContainerService/managedClusters/agentPools@2023-01-02-preview' = {
14 | parent: aksCluster
15 | name: name
16 | properties: config
17 | }
18 |
--------------------------------------------------------------------------------
/infra/core/host/aks-managed-cluster.bicep:
--------------------------------------------------------------------------------
1 | @description('The name for the AKS managed cluster')
2 | param name string
3 |
4 | @description('The name of the resource group for the managed resources of the AKS cluster')
5 | param nodeResourceGroupName string = ''
6 |
7 | @description('The Azure region/location for the AKS resources')
8 | param location string = resourceGroup().location
9 |
10 | @description('Custom tags to apply to the AKS resources')
11 | param tags object = {}
12 |
13 | @description('Kubernetes Version')
14 | param kubernetesVersion string = '1.25.5'
15 |
16 | @description('Whether RBAC is enabled for local accounts')
17 | param enableRbac bool = true
18 |
19 | // Add-ons
20 | @description('Whether web app routing (preview) add-on is enabled')
21 | param webAppRoutingAddon bool = true
22 |
23 | // AAD Integration
24 | @description('Enable Azure Active Directory integration')
25 | param enableAad bool = false
26 |
27 | @description('Enable RBAC using AAD')
28 | param enableAzureRbac bool = false
29 |
30 | @description('The Tenant ID associated to the Azure Active Directory')
31 | param aadTenantId string = ''
32 |
33 | @description('The load balancer SKU to use for ingress into the AKS cluster')
34 | @allowed([ 'basic', 'standard' ])
35 | param loadBalancerSku string = 'standard'
36 |
37 | @description('Network plugin used for building the Kubernetes network.')
38 | @allowed([ 'azure', 'kubenet', 'none' ])
39 | param networkPlugin string = 'azure'
40 |
41 | @description('Network policy used for building the Kubernetes network.')
42 | @allowed([ 'azure', 'calico' ])
43 | param networkPolicy string = 'azure'
44 |
45 | @description('If set to true, getting static credentials will be disabled for this cluster.')
46 | param disableLocalAccounts bool = false
47 |
48 | @description('The managed cluster SKU.')
49 | @allowed([ 'Free', 'Paid', 'Standard' ])
50 | param sku string = 'Free'
51 |
52 | @description('Configuration of AKS add-ons')
53 | param addOns object = {}
54 |
55 | @description('The log analytics workspace id used for logging & monitoring')
56 | param workspaceId string = ''
57 |
58 | @description('The node pool configuration for the System agent pool')
59 | param systemPoolConfig object
60 |
61 | @description('The DNS prefix to associate with the AKS cluster')
62 | param dnsPrefix string = ''
63 |
64 | resource aks 'Microsoft.ContainerService/managedClusters@2023-02-01' = {
65 | name: name
66 | location: location
67 | tags: tags
68 | identity: {
69 | type: 'SystemAssigned'
70 | }
71 | sku: {
72 | name: 'Base'
73 | tier: sku
74 | }
75 | properties: {
76 | nodeResourceGroup: !empty(nodeResourceGroupName) ? nodeResourceGroupName : 'rg-mc-${name}'
77 | kubernetesVersion: kubernetesVersion
78 | dnsPrefix: empty(dnsPrefix) ? '${name}-dns' : dnsPrefix
79 | enableRBAC: enableRbac
80 | aadProfile: enableAad ? {
81 | managed: true
82 | enableAzureRBAC: enableAzureRbac
83 | tenantID: aadTenantId
84 | } : null
85 | agentPoolProfiles: [
86 | systemPoolConfig
87 | ]
88 | networkProfile: {
89 | loadBalancerSku: loadBalancerSku
90 | networkPlugin: networkPlugin
91 | networkPolicy: networkPolicy
92 | }
93 | disableLocalAccounts: disableLocalAccounts && enableAad
94 | addonProfiles: addOns
95 | ingressProfile: {
96 | webAppRouting: {
97 | enabled: webAppRoutingAddon
98 | }
99 | }
100 | }
101 | }
102 |
103 | var aksDiagCategories = [
104 | 'cluster-autoscaler'
105 | 'kube-controller-manager'
106 | 'kube-audit-admin'
107 | 'guard'
108 | ]
109 |
110 | // TODO: Update diagnostics to be its own module
111 | // Blocking issue: https://github.com/Azure/bicep/issues/622
112 | // Unable to pass in a `resource` scope or unable to use string interpolation in resource types
113 | resource diagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = if (!empty(workspaceId)) {
114 | name: 'aks-diagnostics'
115 | scope: aks
116 | properties: {
117 | workspaceId: workspaceId
118 | logs: [for category in aksDiagCategories: {
119 | category: category
120 | enabled: true
121 | }]
122 | metrics: [
123 | {
124 | category: 'AllMetrics'
125 | enabled: true
126 | }
127 | ]
128 | }
129 | }
130 |
131 | @description('The resource name of the AKS cluster')
132 | output clusterName string = aks.name
133 |
134 | @description('The AKS cluster identity')
135 | output clusterIdentity object = {
136 | clientId: aks.properties.identityProfile.kubeletidentity.clientId
137 | objectId: aks.properties.identityProfile.kubeletidentity.objectId
138 | resourceId: aks.properties.identityProfile.kubeletidentity.resourceId
139 | }
140 |
--------------------------------------------------------------------------------
/infra/core/host/aks.bicep:
--------------------------------------------------------------------------------
1 | @description('The name for the AKS managed cluster')
2 | param name string
3 |
4 | @description('The name for the Azure container registry (ACR)')
5 | param containerRegistryName string
6 |
7 | @description('The name of the connected log analytics workspace')
8 | param logAnalyticsName string = ''
9 |
10 | @description('The name of the keyvault to grant access')
11 | param keyVaultName string
12 |
13 | @description('The Azure region/location for the AKS resources')
14 | param location string = resourceGroup().location
15 |
16 | @description('Custom tags to apply to the AKS resources')
17 | param tags object = {}
18 |
19 | @description('AKS add-ons configuration')
20 | param addOns object = {
21 | azurePolicy: {
22 | enabled: true
23 | config: {
24 | version: 'v2'
25 | }
26 | }
27 | keyVault: {
28 | enabled: true
29 | config: {
30 | enableSecretRotation: 'true'
31 | rotationPollInterval: '2m'
32 | }
33 | }
34 | openServiceMesh: {
35 | enabled: false
36 | config: {}
37 | }
38 | omsAgent: {
39 | enabled: true
40 | config: {}
41 | }
42 | applicationGateway: {
43 | enabled: false
44 | config: {}
45 | }
46 | }
47 |
48 | @allowed([
49 | 'CostOptimised'
50 | 'Standard'
51 | 'HighSpec'
52 | 'Custom'
53 | ])
54 | @description('The System Pool Preset sizing')
55 | param systemPoolType string = 'CostOptimised'
56 |
57 | @allowed([
58 | ''
59 | 'CostOptimised'
60 | 'Standard'
61 | 'HighSpec'
62 | 'Custom'
63 | ])
64 | @description('The User Pool Preset sizing')
65 | param agentPoolType string = ''
66 |
67 | // Configure system / user agent pools
68 | @description('Custom configuration of system node pool')
69 | param systemPoolConfig object = {}
70 | @description('Custom configuration of user node pool')
71 | param agentPoolConfig object = {}
72 |
73 | // Configure AKS add-ons
74 | var omsAgentConfig = (!empty(logAnalyticsName) && !empty(addOns.omsAgent) && addOns.omsAgent.enabled) ? union(
75 | addOns.omsAgent,
76 | {
77 | config: {
78 | logAnalyticsWorkspaceResourceID: logAnalytics.id
79 | }
80 | }
81 | ) : {}
82 |
83 | var addOnsConfig = union(
84 | (!empty(addOns.azurePolicy) && addOns.azurePolicy.enabled) ? { azurepolicy: addOns.azurePolicy } : {},
85 | (!empty(addOns.keyVault) && addOns.keyVault.enabled) ? { azureKeyvaultSecretsProvider: addOns.keyVault } : {},
86 | (!empty(addOns.openServiceMesh) && addOns.openServiceMesh.enabled) ? { openServiceMesh: addOns.openServiceMesh } : {},
87 | (!empty(addOns.omsAgent) && addOns.omsAgent.enabled) ? { omsagent: omsAgentConfig } : {},
88 | (!empty(addOns.applicationGateway) && addOns.applicationGateway.enabled) ? { ingressApplicationGateway: addOns.applicationGateway } : {}
89 | )
90 |
91 | // Link to existing log analytics workspace when available
92 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' existing = if (!empty(logAnalyticsName)) {
93 | name: logAnalyticsName
94 | }
95 |
96 | var systemPoolSpec = !empty(systemPoolConfig) ? systemPoolConfig : nodePoolPresets[systemPoolType]
97 |
98 | // Create the primary AKS cluster resources and system node pool
99 | module managedCluster 'aks-managed-cluster.bicep' = {
100 | name: 'managed-cluster'
101 | params: {
102 | name: name
103 | location: location
104 | tags: tags
105 | systemPoolConfig: union(
106 | { name: 'npsystem', mode: 'System' },
107 | nodePoolBase,
108 | systemPoolSpec
109 | )
110 | addOns: addOnsConfig
111 | workspaceId: !empty(logAnalyticsName) ? logAnalytics.id : ''
112 | }
113 | }
114 |
115 | var hasAgentPool = !empty(agentPoolConfig) || !empty(agentPoolType)
116 | var agentPoolSpec = hasAgentPool && !empty(agentPoolConfig) ? agentPoolConfig : empty(agentPoolType) ? {} : nodePoolPresets[agentPoolType]
117 |
118 | // Create additional user agent pool when specified
119 | module agentPool 'aks-agent-pool.bicep' = if (hasAgentPool) {
120 | name: 'aks-node-pool'
121 | params: {
122 | clusterName: managedCluster.outputs.clusterName
123 | name: 'npuserpool'
124 | config: union({ name: 'npuser', mode: 'User' }, nodePoolBase, agentPoolSpec)
125 | }
126 | }
127 |
128 | // Creates container registry (ACR)
129 | module containerRegistry 'container-registry.bicep' = {
130 | name: 'container-registry'
131 | params: {
132 | name: containerRegistryName
133 | location: location
134 | tags: tags
135 | workspaceId: !empty(logAnalyticsName) ? logAnalytics.id : ''
136 | }
137 | }
138 |
139 | // Grant ACR Pull access from cluster managed identity to container registry
140 | module containerRegistryAccess '../security/registry-access.bicep' = {
141 | name: 'cluster-container-registry-access'
142 | params: {
143 | containerRegistryName: containerRegistry.outputs.name
144 | principalId: managedCluster.outputs.clusterIdentity.objectId
145 | }
146 | }
147 |
148 | // Give the AKS Cluster access to KeyVault
149 | module clusterKeyVaultAccess '../security/keyvault-access.bicep' = {
150 | name: 'cluster-keyvault-access'
151 | params: {
152 | keyVaultName: keyVaultName
153 | principalId: managedCluster.outputs.clusterIdentity.objectId
154 | }
155 | }
156 |
157 | // Helpers for node pool configuration
158 | var nodePoolBase = {
159 | osType: 'Linux'
160 | maxPods: 30
161 | type: 'VirtualMachineScaleSets'
162 | upgradeSettings: {
163 | maxSurge: '33%'
164 | }
165 | }
166 |
167 | var nodePoolPresets = {
168 | CostOptimised: {
169 | vmSize: 'Standard_B4ms'
170 | count: 1
171 | minCount: 1
172 | maxCount: 3
173 | enableAutoScaling: true
174 | availabilityZones: []
175 | }
176 | Standard: {
177 | vmSize: 'Standard_DS2_v2'
178 | count: 3
179 | minCount: 3
180 | maxCount: 5
181 | enableAutoScaling: true
182 | availabilityZones: [
183 | '1'
184 | '2'
185 | '3'
186 | ]
187 | }
188 | HighSpec: {
189 | vmSize: 'Standard_D4s_v3'
190 | count: 3
191 | minCount: 3
192 | maxCount: 5
193 | enableAutoScaling: true
194 | availabilityZones: [
195 | '1'
196 | '2'
197 | '3'
198 | ]
199 | }
200 | }
201 |
202 | // Module outputs
203 | @description('The resource name of the AKS cluster')
204 | output clusterName string = managedCluster.outputs.clusterName
205 |
206 | @description('The AKS cluster identity')
207 | output clusterIdentity object = managedCluster.outputs.clusterIdentity
208 |
209 | @description('The resource name of the ACR')
210 | output containerRegistryName string = containerRegistry.outputs.name
211 |
212 | @description('The login server for the container registry')
213 | output containerRegistryLoginServer string = containerRegistry.outputs.loginServer
214 |
--------------------------------------------------------------------------------
/infra/core/host/appservice-appsettings.bicep:
--------------------------------------------------------------------------------
1 | @description('The name of the app service resource within the current resource group scope')
2 | param name string
3 |
4 | @description('The app settings to be applied to the app service')
5 | param appSettings object
6 |
7 | resource appService 'Microsoft.Web/sites@2022-03-01' existing = {
8 | name: name
9 | }
10 |
11 | resource settings 'Microsoft.Web/sites/config@2022-03-01' = {
12 | name: 'appsettings'
13 | parent: appService
14 | properties: appSettings
15 | }
16 |
--------------------------------------------------------------------------------
/infra/core/host/appservice.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | // Reference Properties
6 | param applicationInsightsName string = ''
7 | param appServicePlanId string
8 | param keyVaultName string = ''
9 | param managedIdentity bool = !empty(keyVaultName)
10 |
11 | // Runtime Properties
12 | @allowed([
13 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom'
14 | ])
15 | param runtimeName string
16 | param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}'
17 | param runtimeVersion string
18 |
19 | // Microsoft.Web/sites Properties
20 | param kind string = 'app,linux'
21 |
22 | // Microsoft.Web/sites/config
23 | param allowedOrigins array = []
24 | param alwaysOn bool = true
25 | param appCommandLine string = ''
26 | param appSettings object = {}
27 | param clientAffinityEnabled bool = false
28 | param enableOryxBuild bool = contains(kind, 'linux')
29 | param functionAppScaleLimit int = -1
30 | param linuxFxVersion string = runtimeNameAndVersion
31 | param minimumElasticInstanceCount int = -1
32 | param numberOfWorkers int = -1
33 | param scmDoBuildDuringDeployment bool = false
34 | param use32BitWorkerProcess bool = false
35 | param ftpsState string = 'FtpsOnly'
36 | param healthCheckPath string = ''
37 |
38 | resource appService 'Microsoft.Web/sites@2022-03-01' = {
39 | name: name
40 | location: location
41 | tags: tags
42 | kind: kind
43 | properties: {
44 | serverFarmId: appServicePlanId
45 | siteConfig: {
46 | linuxFxVersion: linuxFxVersion
47 | alwaysOn: alwaysOn
48 | ftpsState: ftpsState
49 | minTlsVersion: '1.2'
50 | appCommandLine: appCommandLine
51 | numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null
52 | minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null
53 | use32BitWorkerProcess: use32BitWorkerProcess
54 | functionAppScaleLimit: functionAppScaleLimit != -1 ? functionAppScaleLimit : null
55 | healthCheckPath: healthCheckPath
56 | cors: {
57 | allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins)
58 | }
59 | }
60 | clientAffinityEnabled: clientAffinityEnabled
61 | httpsOnly: true
62 | }
63 |
64 | identity: { type: managedIdentity ? 'SystemAssigned' : 'None' }
65 |
66 | resource configLogs 'config' = {
67 | name: 'logs'
68 | properties: {
69 | applicationLogs: { fileSystem: { level: 'Verbose' } }
70 | detailedErrorMessages: { enabled: true }
71 | failedRequestsTracing: { enabled: true }
72 | httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } }
73 | }
74 | }
75 |
76 | resource basicPublishingCredentialsPoliciesFtp 'basicPublishingCredentialsPolicies' = {
77 | name: 'ftp'
78 | location: location
79 | properties: {
80 | allow: false
81 | }
82 | }
83 |
84 | resource basicPublishingCredentialsPoliciesScm 'basicPublishingCredentialsPolicies' = {
85 | name: 'scm'
86 | location: location
87 | properties: {
88 | allow: false
89 | }
90 | }
91 | }
92 |
93 | module config 'appservice-appsettings.bicep' = if (!empty(appSettings)) {
94 | name: '${name}-appSettings'
95 | params: {
96 | name: appService.name
97 | appSettings: union(appSettings,
98 | {
99 | SCM_DO_BUILD_DURING_DEPLOYMENT: string(scmDoBuildDuringDeployment)
100 | ENABLE_ORYX_BUILD: string(enableOryxBuild)
101 | },
102 | !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {},
103 | !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {})
104 | }
105 | }
106 |
107 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) {
108 | name: keyVaultName
109 | }
110 |
111 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) {
112 | name: applicationInsightsName
113 | }
114 |
115 | output identityPrincipalId string = managedIdentity ? appService.identity.principalId : ''
116 | output name string = appService.name
117 | output uri string = 'https://${appService.properties.defaultHostName}'
118 |
--------------------------------------------------------------------------------
/infra/core/host/appserviceplan.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param kind string = ''
6 | param reserved bool = true
7 | param sku object
8 |
9 | resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' = {
10 | name: name
11 | location: location
12 | tags: tags
13 | sku: sku
14 | kind: kind
15 | properties: {
16 | reserved: reserved
17 | }
18 | }
19 |
20 | output id string = appServicePlan.id
21 | output name string = appServicePlan.name
22 |
--------------------------------------------------------------------------------
/infra/core/host/container-app-upsert.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param containerAppsEnvironmentName string
6 | param containerName string = 'main'
7 | param containerRegistryName string
8 |
9 | @description('Minimum number of replicas to run')
10 | @minValue(1)
11 | param containerMinReplicas int = 1
12 | @description('Maximum number of replicas to run')
13 | @minValue(1)
14 | param containerMaxReplicas int = 10
15 |
16 | param secrets array = []
17 | param env array = []
18 | param external bool = true
19 | param targetPort int = 80
20 | param exists bool
21 |
22 | @description('User assigned identity name')
23 | param identityName string
24 |
25 | @description('Enabled Ingress for container app')
26 | param ingressEnabled bool = true
27 |
28 | // Dapr Options
29 | @description('Enable Dapr')
30 | param daprEnabled bool = false
31 | @description('Dapr app ID')
32 | param daprAppId string = containerName
33 | @allowed([ 'http', 'grpc' ])
34 | @description('Protocol used by Dapr to connect to the app, e.g. http or grpc')
35 | param daprAppProtocol string = 'http'
36 |
37 | @description('CPU cores allocated to a single container instance, e.g. 0.5')
38 | param containerCpuCoreCount string = '0.5'
39 |
40 | @description('Memory allocated to a single container instance, e.g. 1Gi')
41 | param containerMemory string = '1.0Gi'
42 |
43 | resource existingApp 'Microsoft.App/containerApps@2022-03-01' existing = if (exists) {
44 | name: name
45 | }
46 |
47 | module app 'container-app.bicep' = {
48 | name: '${deployment().name}-update'
49 | params: {
50 | name: name
51 | location: location
52 | tags: tags
53 | identityName: identityName
54 | ingressEnabled: ingressEnabled
55 | containerName: containerName
56 | containerAppsEnvironmentName: containerAppsEnvironmentName
57 | containerRegistryName: containerRegistryName
58 | containerCpuCoreCount: containerCpuCoreCount
59 | containerMemory: containerMemory
60 | containerMinReplicas: containerMinReplicas
61 | containerMaxReplicas: containerMaxReplicas
62 | daprEnabled: daprEnabled
63 | daprAppId: daprAppId
64 | daprAppProtocol: daprAppProtocol
65 | secrets: secrets
66 | external: external
67 | env: env
68 | imageName: exists ? existingApp.properties.template.containers[0].image : ''
69 | targetPort: targetPort
70 | }
71 | }
72 |
73 | output defaultDomain string = app.outputs.defaultDomain
74 | output imageName string = app.outputs.imageName
75 | output name string = app.outputs.name
76 | output uri string = app.outputs.uri
77 |
--------------------------------------------------------------------------------
/infra/core/host/container-app.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param containerAppsEnvironmentName string
6 | param containerName string = 'main'
7 | param containerRegistryName string
8 |
9 | @description('Minimum number of replicas to run')
10 | @minValue(1)
11 | param containerMinReplicas int = 1
12 | @description('Maximum number of replicas to run')
13 | @minValue(1)
14 | param containerMaxReplicas int = 10
15 |
16 | param secrets array = []
17 | param env array = []
18 | param external bool = true
19 | param imageName string
20 | param targetPort int = 80
21 |
22 | @description('User assigned identity name')
23 | param identityName string
24 |
25 | @description('Enabled Ingress for container app')
26 | param ingressEnabled bool = true
27 |
28 | // Dapr Options
29 | @description('Enable Dapr')
30 | param daprEnabled bool = false
31 | @description('Dapr app ID')
32 | param daprAppId string = containerName
33 | @allowed([ 'http', 'grpc' ])
34 | @description('Protocol used by Dapr to connect to the app, e.g. http or grpc')
35 | param daprAppProtocol string = 'http'
36 |
37 | @description('CPU cores allocated to a single container instance, e.g. 0.5')
38 | param containerCpuCoreCount string = '0.5'
39 |
40 | @description('Memory allocated to a single container instance, e.g. 1Gi')
41 | param containerMemory string = '1.0Gi'
42 |
43 | resource userIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' existing = {
44 | name: identityName
45 | }
46 |
47 | module containerRegistryAccess '../security/registry-access.bicep' = {
48 | name: '${deployment().name}-registry-access'
49 | params: {
50 | containerRegistryName: containerRegistryName
51 | principalId: userIdentity.properties.principalId
52 | }
53 | }
54 |
55 | resource app 'Microsoft.App/containerApps@2022-03-01' = {
56 | name: name
57 | location: location
58 | tags: tags
59 | // It is critical that the identity is granted ACR pull access before the app is created
60 | // otherwise the container app will throw a provision error
61 | // This also forces us to use an user assigned managed identity since there would no way to
62 | // provide the system assigned identity with the ACR pull access before the app is created
63 | dependsOn: [ containerRegistryAccess ]
64 | identity: {
65 | type: 'UserAssigned'
66 | userAssignedIdentities: { '${userIdentity.id}': {} }
67 | }
68 | properties: {
69 | managedEnvironmentId: containerAppsEnvironment.id
70 | configuration: {
71 | activeRevisionsMode: 'single'
72 | ingress: ingressEnabled ? {
73 | external: external
74 | targetPort: targetPort
75 | transport: 'auto'
76 | } : null
77 | dapr: daprEnabled ? {
78 | enabled: true
79 | appId: daprAppId
80 | appProtocol: daprAppProtocol
81 | appPort: ingressEnabled ? targetPort : 0
82 | } : { enabled: false }
83 | secrets: secrets
84 | registries: [
85 | {
86 | server: '${containerRegistry.name}.azurecr.io'
87 | identity: userIdentity.id
88 | }
89 | ]
90 | }
91 | template: {
92 | containers: [
93 | {
94 | image: !empty(imageName) ? imageName : 'mcr.microsoft.com/azuredocs/containerapps-helloworld:latest'
95 | name: containerName
96 | env: env
97 | resources: {
98 | cpu: json(containerCpuCoreCount)
99 | memory: containerMemory
100 | }
101 | }
102 | ]
103 | scale: {
104 | minReplicas: containerMinReplicas
105 | maxReplicas: containerMaxReplicas
106 | }
107 | }
108 | }
109 | }
110 |
111 | resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2022-03-01' existing = {
112 | name: containerAppsEnvironmentName
113 | }
114 |
115 | // 2022-02-01-preview needed for anonymousPullEnabled
116 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' existing = {
117 | name: containerRegistryName
118 | }
119 |
120 | output defaultDomain string = containerAppsEnvironment.properties.defaultDomain
121 | output imageName string = imageName
122 | output name string = app.name
123 | output uri string = 'https://${app.properties.configuration.ingress.fqdn}'
124 |
--------------------------------------------------------------------------------
/infra/core/host/container-apps-environment.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param daprEnabled bool = false
6 | param logAnalyticsWorkspaceName string
7 | param applicationInsightsName string = ''
8 |
9 | resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2022-03-01' = {
10 | name: name
11 | location: location
12 | tags: tags
13 | properties: {
14 | appLogsConfiguration: {
15 | destination: 'log-analytics'
16 | logAnalyticsConfiguration: {
17 | customerId: logAnalyticsWorkspace.properties.customerId
18 | sharedKey: logAnalyticsWorkspace.listKeys().primarySharedKey
19 | }
20 | }
21 | daprAIInstrumentationKey: daprEnabled && !empty(applicationInsightsName) ? applicationInsights.properties.InstrumentationKey : ''
22 | }
23 | }
24 |
25 | resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = {
26 | name: logAnalyticsWorkspaceName
27 | }
28 |
29 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (daprEnabled && !empty(applicationInsightsName)){
30 | name: applicationInsightsName
31 | }
32 |
33 | output defaultDomain string = containerAppsEnvironment.properties.defaultDomain
34 | output name string = containerAppsEnvironment.name
35 |
--------------------------------------------------------------------------------
/infra/core/host/container-apps.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param containerAppsEnvironmentName string
6 | param containerRegistryName string
7 | param logAnalyticsWorkspaceName string
8 | param applicationInsightsName string = ''
9 |
10 | module containerAppsEnvironment 'container-apps-environment.bicep' = {
11 | name: '${name}-container-apps-environment'
12 | params: {
13 | name: containerAppsEnvironmentName
14 | location: location
15 | tags: tags
16 | logAnalyticsWorkspaceName: logAnalyticsWorkspaceName
17 | applicationInsightsName: applicationInsightsName
18 | }
19 | }
20 |
21 | module containerRegistry 'container-registry.bicep' = {
22 | name: '${name}-container-registry'
23 | params: {
24 | name: containerRegistryName
25 | location: location
26 | tags: tags
27 | }
28 | }
29 |
30 | output defaultDomain string = containerAppsEnvironment.outputs.defaultDomain
31 | output environmentName string = containerAppsEnvironment.outputs.name
32 | output registryLoginServer string = containerRegistry.outputs.loginServer
33 | output registryName string = containerRegistry.outputs.name
34 |
--------------------------------------------------------------------------------
/infra/core/host/container-registry.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param adminUserEnabled bool = true
6 | param anonymousPullEnabled bool = false
7 | param dataEndpointEnabled bool = false
8 | param encryption object = {
9 | status: 'disabled'
10 | }
11 | param networkRuleBypassOptions string = 'AzureServices'
12 | param publicNetworkAccess string = 'Enabled'
13 | param sku object = {
14 | name: 'Basic'
15 | }
16 | param zoneRedundancy string = 'Disabled'
17 |
18 | @description('The log analytics workspace id used for logging & monitoring')
19 | param workspaceId string = ''
20 |
21 | // 2022-02-01-preview needed for anonymousPullEnabled
22 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' = {
23 | name: name
24 | location: location
25 | tags: tags
26 | sku: sku
27 | properties: {
28 | adminUserEnabled: adminUserEnabled
29 | anonymousPullEnabled: anonymousPullEnabled
30 | dataEndpointEnabled: dataEndpointEnabled
31 | encryption: encryption
32 | networkRuleBypassOptions: networkRuleBypassOptions
33 | publicNetworkAccess: publicNetworkAccess
34 | zoneRedundancy: zoneRedundancy
35 | }
36 | }
37 |
38 | // TODO: Update diagnostics to be its own module
39 | // Blocking issue: https://github.com/Azure/bicep/issues/622
40 | // Unable to pass in a `resource` scope or unable to use string interpolation in resource types
41 | resource diagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = if (!empty(workspaceId)) {
42 | name: 'registry-diagnostics'
43 | scope: containerRegistry
44 | properties: {
45 | workspaceId: workspaceId
46 | logs: [
47 | {
48 | category: 'ContainerRegistryRepositoryEvents'
49 | enabled: true
50 | }
51 | {
52 | category: 'ContainerRegistryLoginEvents'
53 | enabled: true
54 | }
55 | ]
56 | metrics: [
57 | {
58 | category: 'AllMetrics'
59 | enabled: true
60 | timeGrain: 'PT1M'
61 | }
62 | ]
63 | }
64 | }
65 |
66 | output loginServer string = containerRegistry.properties.loginServer
67 | output name string = containerRegistry.name
68 |
--------------------------------------------------------------------------------
/infra/core/host/functions.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | // Reference Properties
6 | param applicationInsightsName string = ''
7 | param appServicePlanId string
8 | param keyVaultName string = ''
9 | param managedIdentity bool = !empty(keyVaultName)
10 | param storageAccountName string
11 |
12 | // Runtime Properties
13 | @allowed([
14 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom'
15 | ])
16 | param runtimeName string
17 | param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}'
18 | param runtimeVersion string
19 |
20 | // Function Settings
21 | @allowed([
22 | '~4', '~3', '~2', '~1'
23 | ])
24 | param extensionVersion string = '~4'
25 |
26 | // Microsoft.Web/sites Properties
27 | param kind string = 'functionapp,linux'
28 |
29 | // Microsoft.Web/sites/config
30 | param allowedOrigins array = []
31 | param alwaysOn bool = true
32 | param appCommandLine string = ''
33 | param appSettings object = {}
34 | param clientAffinityEnabled bool = false
35 | param enableOryxBuild bool = contains(kind, 'linux')
36 | param functionAppScaleLimit int = -1
37 | param linuxFxVersion string = runtimeNameAndVersion
38 | param minimumElasticInstanceCount int = -1
39 | param numberOfWorkers int = -1
40 | param scmDoBuildDuringDeployment bool = true
41 | param use32BitWorkerProcess bool = false
42 |
43 | module functions 'appservice.bicep' = {
44 | name: '${name}-functions'
45 | params: {
46 | name: name
47 | location: location
48 | tags: tags
49 | allowedOrigins: allowedOrigins
50 | alwaysOn: alwaysOn
51 | appCommandLine: appCommandLine
52 | applicationInsightsName: applicationInsightsName
53 | appServicePlanId: appServicePlanId
54 | appSettings: union(appSettings, {
55 | AzureWebJobsStorage: 'DefaultEndpointsProtocol=https;AccountName=${storage.name};AccountKey=${storage.listKeys().keys[0].value};EndpointSuffix=${environment().suffixes.storage}'
56 | FUNCTIONS_EXTENSION_VERSION: extensionVersion
57 | FUNCTIONS_WORKER_RUNTIME: runtimeName
58 | })
59 | clientAffinityEnabled: clientAffinityEnabled
60 | enableOryxBuild: enableOryxBuild
61 | functionAppScaleLimit: functionAppScaleLimit
62 | keyVaultName: keyVaultName
63 | kind: kind
64 | linuxFxVersion: linuxFxVersion
65 | managedIdentity: managedIdentity
66 | minimumElasticInstanceCount: minimumElasticInstanceCount
67 | numberOfWorkers: numberOfWorkers
68 | runtimeName: runtimeName
69 | runtimeVersion: runtimeVersion
70 | runtimeNameAndVersion: runtimeNameAndVersion
71 | scmDoBuildDuringDeployment: scmDoBuildDuringDeployment
72 | use32BitWorkerProcess: use32BitWorkerProcess
73 | }
74 | }
75 |
76 | resource storage 'Microsoft.Storage/storageAccounts@2021-09-01' existing = {
77 | name: storageAccountName
78 | }
79 |
80 | output identityPrincipalId string = managedIdentity ? functions.outputs.identityPrincipalId : ''
81 | output name string = functions.outputs.name
82 | output uri string = functions.outputs.uri
83 |
--------------------------------------------------------------------------------
/infra/core/host/staticwebapp.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param sku object = {
6 | name: 'Free'
7 | tier: 'Free'
8 | }
9 |
10 | resource web 'Microsoft.Web/staticSites@2022-03-01' = {
11 | name: name
12 | location: location
13 | tags: tags
14 | sku: sku
15 | properties: {
16 | provider: 'Custom'
17 | }
18 | }
19 |
20 | output name string = web.name
21 | output uri string = 'https://${web.properties.defaultHostname}'
22 |
--------------------------------------------------------------------------------
/infra/core/monitor/applicationinsights-dashboard.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param applicationInsightsName string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | // 2020-09-01-preview because that is the latest valid version
7 | resource applicationInsightsDashboard 'Microsoft.Portal/dashboards@2020-09-01-preview' = {
8 | name: name
9 | location: location
10 | tags: tags
11 | properties: {
12 | lenses: [
13 | {
14 | order: 0
15 | parts: [
16 | {
17 | position: {
18 | x: 0
19 | y: 0
20 | colSpan: 2
21 | rowSpan: 1
22 | }
23 | metadata: {
24 | inputs: [
25 | {
26 | name: 'id'
27 | value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
28 | }
29 | {
30 | name: 'Version'
31 | value: '1.0'
32 | }
33 | ]
34 | #disable-next-line BCP036
35 | type: 'Extension/AppInsightsExtension/PartType/AspNetOverviewPinnedPart'
36 | asset: {
37 | idInputName: 'id'
38 | type: 'ApplicationInsights'
39 | }
40 | defaultMenuItemId: 'overview'
41 | }
42 | }
43 | {
44 | position: {
45 | x: 2
46 | y: 0
47 | colSpan: 1
48 | rowSpan: 1
49 | }
50 | metadata: {
51 | inputs: [
52 | {
53 | name: 'ComponentId'
54 | value: {
55 | Name: applicationInsights.name
56 | SubscriptionId: subscription().subscriptionId
57 | ResourceGroup: resourceGroup().name
58 | }
59 | }
60 | {
61 | name: 'Version'
62 | value: '1.0'
63 | }
64 | ]
65 | #disable-next-line BCP036
66 | type: 'Extension/AppInsightsExtension/PartType/ProactiveDetectionAsyncPart'
67 | asset: {
68 | idInputName: 'ComponentId'
69 | type: 'ApplicationInsights'
70 | }
71 | defaultMenuItemId: 'ProactiveDetection'
72 | }
73 | }
74 | {
75 | position: {
76 | x: 3
77 | y: 0
78 | colSpan: 1
79 | rowSpan: 1
80 | }
81 | metadata: {
82 | inputs: [
83 | {
84 | name: 'ComponentId'
85 | value: {
86 | Name: applicationInsights.name
87 | SubscriptionId: subscription().subscriptionId
88 | ResourceGroup: resourceGroup().name
89 | }
90 | }
91 | {
92 | name: 'ResourceId'
93 | value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
94 | }
95 | ]
96 | #disable-next-line BCP036
97 | type: 'Extension/AppInsightsExtension/PartType/QuickPulseButtonSmallPart'
98 | asset: {
99 | idInputName: 'ComponentId'
100 | type: 'ApplicationInsights'
101 | }
102 | }
103 | }
104 | {
105 | position: {
106 | x: 4
107 | y: 0
108 | colSpan: 1
109 | rowSpan: 1
110 | }
111 | metadata: {
112 | inputs: [
113 | {
114 | name: 'ComponentId'
115 | value: {
116 | Name: applicationInsights.name
117 | SubscriptionId: subscription().subscriptionId
118 | ResourceGroup: resourceGroup().name
119 | }
120 | }
121 | {
122 | name: 'TimeContext'
123 | value: {
124 | durationMs: 86400000
125 | endTime: null
126 | createdTime: '2018-05-04T01:20:33.345Z'
127 | isInitialTime: true
128 | grain: 1
129 | useDashboardTimeRange: false
130 | }
131 | }
132 | {
133 | name: 'Version'
134 | value: '1.0'
135 | }
136 | ]
137 | #disable-next-line BCP036
138 | type: 'Extension/AppInsightsExtension/PartType/AvailabilityNavButtonPart'
139 | asset: {
140 | idInputName: 'ComponentId'
141 | type: 'ApplicationInsights'
142 | }
143 | }
144 | }
145 | {
146 | position: {
147 | x: 5
148 | y: 0
149 | colSpan: 1
150 | rowSpan: 1
151 | }
152 | metadata: {
153 | inputs: [
154 | {
155 | name: 'ComponentId'
156 | value: {
157 | Name: applicationInsights.name
158 | SubscriptionId: subscription().subscriptionId
159 | ResourceGroup: resourceGroup().name
160 | }
161 | }
162 | {
163 | name: 'TimeContext'
164 | value: {
165 | durationMs: 86400000
166 | endTime: null
167 | createdTime: '2018-05-08T18:47:35.237Z'
168 | isInitialTime: true
169 | grain: 1
170 | useDashboardTimeRange: false
171 | }
172 | }
173 | {
174 | name: 'ConfigurationId'
175 | value: '78ce933e-e864-4b05-a27b-71fd55a6afad'
176 | }
177 | ]
178 | #disable-next-line BCP036
179 | type: 'Extension/AppInsightsExtension/PartType/AppMapButtonPart'
180 | asset: {
181 | idInputName: 'ComponentId'
182 | type: 'ApplicationInsights'
183 | }
184 | }
185 | }
186 | {
187 | position: {
188 | x: 0
189 | y: 1
190 | colSpan: 3
191 | rowSpan: 1
192 | }
193 | metadata: {
194 | inputs: []
195 | type: 'Extension/HubsExtension/PartType/MarkdownPart'
196 | settings: {
197 | content: {
198 | settings: {
199 | content: '# Usage'
200 | title: ''
201 | subtitle: ''
202 | }
203 | }
204 | }
205 | }
206 | }
207 | {
208 | position: {
209 | x: 3
210 | y: 1
211 | colSpan: 1
212 | rowSpan: 1
213 | }
214 | metadata: {
215 | inputs: [
216 | {
217 | name: 'ComponentId'
218 | value: {
219 | Name: applicationInsights.name
220 | SubscriptionId: subscription().subscriptionId
221 | ResourceGroup: resourceGroup().name
222 | }
223 | }
224 | {
225 | name: 'TimeContext'
226 | value: {
227 | durationMs: 86400000
228 | endTime: null
229 | createdTime: '2018-05-04T01:22:35.782Z'
230 | isInitialTime: true
231 | grain: 1
232 | useDashboardTimeRange: false
233 | }
234 | }
235 | ]
236 | #disable-next-line BCP036
237 | type: 'Extension/AppInsightsExtension/PartType/UsageUsersOverviewPart'
238 | asset: {
239 | idInputName: 'ComponentId'
240 | type: 'ApplicationInsights'
241 | }
242 | }
243 | }
244 | {
245 | position: {
246 | x: 4
247 | y: 1
248 | colSpan: 3
249 | rowSpan: 1
250 | }
251 | metadata: {
252 | inputs: []
253 | type: 'Extension/HubsExtension/PartType/MarkdownPart'
254 | settings: {
255 | content: {
256 | settings: {
257 | content: '# Reliability'
258 | title: ''
259 | subtitle: ''
260 | }
261 | }
262 | }
263 | }
264 | }
265 | {
266 | position: {
267 | x: 7
268 | y: 1
269 | colSpan: 1
270 | rowSpan: 1
271 | }
272 | metadata: {
273 | inputs: [
274 | {
275 | name: 'ResourceId'
276 | value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
277 | }
278 | {
279 | name: 'DataModel'
280 | value: {
281 | version: '1.0.0'
282 | timeContext: {
283 | durationMs: 86400000
284 | createdTime: '2018-05-04T23:42:40.072Z'
285 | isInitialTime: false
286 | grain: 1
287 | useDashboardTimeRange: false
288 | }
289 | }
290 | isOptional: true
291 | }
292 | {
293 | name: 'ConfigurationId'
294 | value: '8a02f7bf-ac0f-40e1-afe9-f0e72cfee77f'
295 | isOptional: true
296 | }
297 | ]
298 | #disable-next-line BCP036
299 | type: 'Extension/AppInsightsExtension/PartType/CuratedBladeFailuresPinnedPart'
300 | isAdapter: true
301 | asset: {
302 | idInputName: 'ResourceId'
303 | type: 'ApplicationInsights'
304 | }
305 | defaultMenuItemId: 'failures'
306 | }
307 | }
308 | {
309 | position: {
310 | x: 8
311 | y: 1
312 | colSpan: 3
313 | rowSpan: 1
314 | }
315 | metadata: {
316 | inputs: []
317 | type: 'Extension/HubsExtension/PartType/MarkdownPart'
318 | settings: {
319 | content: {
320 | settings: {
321 | content: '# Responsiveness\r\n'
322 | title: ''
323 | subtitle: ''
324 | }
325 | }
326 | }
327 | }
328 | }
329 | {
330 | position: {
331 | x: 11
332 | y: 1
333 | colSpan: 1
334 | rowSpan: 1
335 | }
336 | metadata: {
337 | inputs: [
338 | {
339 | name: 'ResourceId'
340 | value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
341 | }
342 | {
343 | name: 'DataModel'
344 | value: {
345 | version: '1.0.0'
346 | timeContext: {
347 | durationMs: 86400000
348 | createdTime: '2018-05-04T23:43:37.804Z'
349 | isInitialTime: false
350 | grain: 1
351 | useDashboardTimeRange: false
352 | }
353 | }
354 | isOptional: true
355 | }
356 | {
357 | name: 'ConfigurationId'
358 | value: '2a8ede4f-2bee-4b9c-aed9-2db0e8a01865'
359 | isOptional: true
360 | }
361 | ]
362 | #disable-next-line BCP036
363 | type: 'Extension/AppInsightsExtension/PartType/CuratedBladePerformancePinnedPart'
364 | isAdapter: true
365 | asset: {
366 | idInputName: 'ResourceId'
367 | type: 'ApplicationInsights'
368 | }
369 | defaultMenuItemId: 'performance'
370 | }
371 | }
372 | {
373 | position: {
374 | x: 12
375 | y: 1
376 | colSpan: 3
377 | rowSpan: 1
378 | }
379 | metadata: {
380 | inputs: []
381 | type: 'Extension/HubsExtension/PartType/MarkdownPart'
382 | settings: {
383 | content: {
384 | settings: {
385 | content: '# Browser'
386 | title: ''
387 | subtitle: ''
388 | }
389 | }
390 | }
391 | }
392 | }
393 | {
394 | position: {
395 | x: 15
396 | y: 1
397 | colSpan: 1
398 | rowSpan: 1
399 | }
400 | metadata: {
401 | inputs: [
402 | {
403 | name: 'ComponentId'
404 | value: {
405 | Name: applicationInsights.name
406 | SubscriptionId: subscription().subscriptionId
407 | ResourceGroup: resourceGroup().name
408 | }
409 | }
410 | {
411 | name: 'MetricsExplorerJsonDefinitionId'
412 | value: 'BrowserPerformanceTimelineMetrics'
413 | }
414 | {
415 | name: 'TimeContext'
416 | value: {
417 | durationMs: 86400000
418 | createdTime: '2018-05-08T12:16:27.534Z'
419 | isInitialTime: false
420 | grain: 1
421 | useDashboardTimeRange: false
422 | }
423 | }
424 | {
425 | name: 'CurrentFilter'
426 | value: {
427 | eventTypes: [
428 | 4
429 | 1
430 | 3
431 | 5
432 | 2
433 | 6
434 | 13
435 | ]
436 | typeFacets: {}
437 | isPermissive: false
438 | }
439 | }
440 | {
441 | name: 'id'
442 | value: {
443 | Name: applicationInsights.name
444 | SubscriptionId: subscription().subscriptionId
445 | ResourceGroup: resourceGroup().name
446 | }
447 | }
448 | {
449 | name: 'Version'
450 | value: '1.0'
451 | }
452 | ]
453 | #disable-next-line BCP036
454 | type: 'Extension/AppInsightsExtension/PartType/MetricsExplorerBladePinnedPart'
455 | asset: {
456 | idInputName: 'ComponentId'
457 | type: 'ApplicationInsights'
458 | }
459 | defaultMenuItemId: 'browser'
460 | }
461 | }
462 | {
463 | position: {
464 | x: 0
465 | y: 2
466 | colSpan: 4
467 | rowSpan: 3
468 | }
469 | metadata: {
470 | inputs: [
471 | {
472 | name: 'options'
473 | value: {
474 | chart: {
475 | metrics: [
476 | {
477 | resourceMetadata: {
478 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
479 | }
480 | name: 'sessions/count'
481 | aggregationType: 5
482 | namespace: 'microsoft.insights/components/kusto'
483 | metricVisualization: {
484 | displayName: 'Sessions'
485 | color: '#47BDF5'
486 | }
487 | }
488 | {
489 | resourceMetadata: {
490 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
491 | }
492 | name: 'users/count'
493 | aggregationType: 5
494 | namespace: 'microsoft.insights/components/kusto'
495 | metricVisualization: {
496 | displayName: 'Users'
497 | color: '#7E58FF'
498 | }
499 | }
500 | ]
501 | title: 'Unique sessions and users'
502 | visualization: {
503 | chartType: 2
504 | legendVisualization: {
505 | isVisible: true
506 | position: 2
507 | hideSubtitle: false
508 | }
509 | axisVisualization: {
510 | x: {
511 | isVisible: true
512 | axisType: 2
513 | }
514 | y: {
515 | isVisible: true
516 | axisType: 1
517 | }
518 | }
519 | }
520 | openBladeOnClick: {
521 | openBlade: true
522 | destinationBlade: {
523 | extensionName: 'HubsExtension'
524 | bladeName: 'ResourceMenuBlade'
525 | parameters: {
526 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
527 | menuid: 'segmentationUsers'
528 | }
529 | }
530 | }
531 | }
532 | }
533 | }
534 | {
535 | name: 'sharedTimeRange'
536 | isOptional: true
537 | }
538 | ]
539 | #disable-next-line BCP036
540 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
541 | settings: {}
542 | }
543 | }
544 | {
545 | position: {
546 | x: 4
547 | y: 2
548 | colSpan: 4
549 | rowSpan: 3
550 | }
551 | metadata: {
552 | inputs: [
553 | {
554 | name: 'options'
555 | value: {
556 | chart: {
557 | metrics: [
558 | {
559 | resourceMetadata: {
560 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
561 | }
562 | name: 'requests/failed'
563 | aggregationType: 7
564 | namespace: 'microsoft.insights/components'
565 | metricVisualization: {
566 | displayName: 'Failed requests'
567 | color: '#EC008C'
568 | }
569 | }
570 | ]
571 | title: 'Failed requests'
572 | visualization: {
573 | chartType: 3
574 | legendVisualization: {
575 | isVisible: true
576 | position: 2
577 | hideSubtitle: false
578 | }
579 | axisVisualization: {
580 | x: {
581 | isVisible: true
582 | axisType: 2
583 | }
584 | y: {
585 | isVisible: true
586 | axisType: 1
587 | }
588 | }
589 | }
590 | openBladeOnClick: {
591 | openBlade: true
592 | destinationBlade: {
593 | extensionName: 'HubsExtension'
594 | bladeName: 'ResourceMenuBlade'
595 | parameters: {
596 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
597 | menuid: 'failures'
598 | }
599 | }
600 | }
601 | }
602 | }
603 | }
604 | {
605 | name: 'sharedTimeRange'
606 | isOptional: true
607 | }
608 | ]
609 | #disable-next-line BCP036
610 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
611 | settings: {}
612 | }
613 | }
614 | {
615 | position: {
616 | x: 8
617 | y: 2
618 | colSpan: 4
619 | rowSpan: 3
620 | }
621 | metadata: {
622 | inputs: [
623 | {
624 | name: 'options'
625 | value: {
626 | chart: {
627 | metrics: [
628 | {
629 | resourceMetadata: {
630 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
631 | }
632 | name: 'requests/duration'
633 | aggregationType: 4
634 | namespace: 'microsoft.insights/components'
635 | metricVisualization: {
636 | displayName: 'Server response time'
637 | color: '#00BCF2'
638 | }
639 | }
640 | ]
641 | title: 'Server response time'
642 | visualization: {
643 | chartType: 2
644 | legendVisualization: {
645 | isVisible: true
646 | position: 2
647 | hideSubtitle: false
648 | }
649 | axisVisualization: {
650 | x: {
651 | isVisible: true
652 | axisType: 2
653 | }
654 | y: {
655 | isVisible: true
656 | axisType: 1
657 | }
658 | }
659 | }
660 | openBladeOnClick: {
661 | openBlade: true
662 | destinationBlade: {
663 | extensionName: 'HubsExtension'
664 | bladeName: 'ResourceMenuBlade'
665 | parameters: {
666 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
667 | menuid: 'performance'
668 | }
669 | }
670 | }
671 | }
672 | }
673 | }
674 | {
675 | name: 'sharedTimeRange'
676 | isOptional: true
677 | }
678 | ]
679 | #disable-next-line BCP036
680 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
681 | settings: {}
682 | }
683 | }
684 | {
685 | position: {
686 | x: 12
687 | y: 2
688 | colSpan: 4
689 | rowSpan: 3
690 | }
691 | metadata: {
692 | inputs: [
693 | {
694 | name: 'options'
695 | value: {
696 | chart: {
697 | metrics: [
698 | {
699 | resourceMetadata: {
700 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
701 | }
702 | name: 'browserTimings/networkDuration'
703 | aggregationType: 4
704 | namespace: 'microsoft.insights/components'
705 | metricVisualization: {
706 | displayName: 'Page load network connect time'
707 | color: '#7E58FF'
708 | }
709 | }
710 | {
711 | resourceMetadata: {
712 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
713 | }
714 | name: 'browserTimings/processingDuration'
715 | aggregationType: 4
716 | namespace: 'microsoft.insights/components'
717 | metricVisualization: {
718 | displayName: 'Client processing time'
719 | color: '#44F1C8'
720 | }
721 | }
722 | {
723 | resourceMetadata: {
724 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
725 | }
726 | name: 'browserTimings/sendDuration'
727 | aggregationType: 4
728 | namespace: 'microsoft.insights/components'
729 | metricVisualization: {
730 | displayName: 'Send request time'
731 | color: '#EB9371'
732 | }
733 | }
734 | {
735 | resourceMetadata: {
736 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
737 | }
738 | name: 'browserTimings/receiveDuration'
739 | aggregationType: 4
740 | namespace: 'microsoft.insights/components'
741 | metricVisualization: {
742 | displayName: 'Receiving response time'
743 | color: '#0672F1'
744 | }
745 | }
746 | ]
747 | title: 'Average page load time breakdown'
748 | visualization: {
749 | chartType: 3
750 | legendVisualization: {
751 | isVisible: true
752 | position: 2
753 | hideSubtitle: false
754 | }
755 | axisVisualization: {
756 | x: {
757 | isVisible: true
758 | axisType: 2
759 | }
760 | y: {
761 | isVisible: true
762 | axisType: 1
763 | }
764 | }
765 | }
766 | }
767 | }
768 | }
769 | {
770 | name: 'sharedTimeRange'
771 | isOptional: true
772 | }
773 | ]
774 | #disable-next-line BCP036
775 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
776 | settings: {}
777 | }
778 | }
779 | {
780 | position: {
781 | x: 0
782 | y: 5
783 | colSpan: 4
784 | rowSpan: 3
785 | }
786 | metadata: {
787 | inputs: [
788 | {
789 | name: 'options'
790 | value: {
791 | chart: {
792 | metrics: [
793 | {
794 | resourceMetadata: {
795 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
796 | }
797 | name: 'availabilityResults/availabilityPercentage'
798 | aggregationType: 4
799 | namespace: 'microsoft.insights/components'
800 | metricVisualization: {
801 | displayName: 'Availability'
802 | color: '#47BDF5'
803 | }
804 | }
805 | ]
806 | title: 'Average availability'
807 | visualization: {
808 | chartType: 3
809 | legendVisualization: {
810 | isVisible: true
811 | position: 2
812 | hideSubtitle: false
813 | }
814 | axisVisualization: {
815 | x: {
816 | isVisible: true
817 | axisType: 2
818 | }
819 | y: {
820 | isVisible: true
821 | axisType: 1
822 | }
823 | }
824 | }
825 | openBladeOnClick: {
826 | openBlade: true
827 | destinationBlade: {
828 | extensionName: 'HubsExtension'
829 | bladeName: 'ResourceMenuBlade'
830 | parameters: {
831 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
832 | menuid: 'availability'
833 | }
834 | }
835 | }
836 | }
837 | }
838 | }
839 | {
840 | name: 'sharedTimeRange'
841 | isOptional: true
842 | }
843 | ]
844 | #disable-next-line BCP036
845 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
846 | settings: {}
847 | }
848 | }
849 | {
850 | position: {
851 | x: 4
852 | y: 5
853 | colSpan: 4
854 | rowSpan: 3
855 | }
856 | metadata: {
857 | inputs: [
858 | {
859 | name: 'options'
860 | value: {
861 | chart: {
862 | metrics: [
863 | {
864 | resourceMetadata: {
865 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
866 | }
867 | name: 'exceptions/server'
868 | aggregationType: 7
869 | namespace: 'microsoft.insights/components'
870 | metricVisualization: {
871 | displayName: 'Server exceptions'
872 | color: '#47BDF5'
873 | }
874 | }
875 | {
876 | resourceMetadata: {
877 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
878 | }
879 | name: 'dependencies/failed'
880 | aggregationType: 7
881 | namespace: 'microsoft.insights/components'
882 | metricVisualization: {
883 | displayName: 'Dependency failures'
884 | color: '#7E58FF'
885 | }
886 | }
887 | ]
888 | title: 'Server exceptions and Dependency failures'
889 | visualization: {
890 | chartType: 2
891 | legendVisualization: {
892 | isVisible: true
893 | position: 2
894 | hideSubtitle: false
895 | }
896 | axisVisualization: {
897 | x: {
898 | isVisible: true
899 | axisType: 2
900 | }
901 | y: {
902 | isVisible: true
903 | axisType: 1
904 | }
905 | }
906 | }
907 | }
908 | }
909 | }
910 | {
911 | name: 'sharedTimeRange'
912 | isOptional: true
913 | }
914 | ]
915 | #disable-next-line BCP036
916 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
917 | settings: {}
918 | }
919 | }
920 | {
921 | position: {
922 | x: 8
923 | y: 5
924 | colSpan: 4
925 | rowSpan: 3
926 | }
927 | metadata: {
928 | inputs: [
929 | {
930 | name: 'options'
931 | value: {
932 | chart: {
933 | metrics: [
934 | {
935 | resourceMetadata: {
936 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
937 | }
938 | name: 'performanceCounters/processorCpuPercentage'
939 | aggregationType: 4
940 | namespace: 'microsoft.insights/components'
941 | metricVisualization: {
942 | displayName: 'Processor time'
943 | color: '#47BDF5'
944 | }
945 | }
946 | {
947 | resourceMetadata: {
948 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
949 | }
950 | name: 'performanceCounters/processCpuPercentage'
951 | aggregationType: 4
952 | namespace: 'microsoft.insights/components'
953 | metricVisualization: {
954 | displayName: 'Process CPU'
955 | color: '#7E58FF'
956 | }
957 | }
958 | ]
959 | title: 'Average processor and process CPU utilization'
960 | visualization: {
961 | chartType: 2
962 | legendVisualization: {
963 | isVisible: true
964 | position: 2
965 | hideSubtitle: false
966 | }
967 | axisVisualization: {
968 | x: {
969 | isVisible: true
970 | axisType: 2
971 | }
972 | y: {
973 | isVisible: true
974 | axisType: 1
975 | }
976 | }
977 | }
978 | }
979 | }
980 | }
981 | {
982 | name: 'sharedTimeRange'
983 | isOptional: true
984 | }
985 | ]
986 | #disable-next-line BCP036
987 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
988 | settings: {}
989 | }
990 | }
991 | {
992 | position: {
993 | x: 12
994 | y: 5
995 | colSpan: 4
996 | rowSpan: 3
997 | }
998 | metadata: {
999 | inputs: [
1000 | {
1001 | name: 'options'
1002 | value: {
1003 | chart: {
1004 | metrics: [
1005 | {
1006 | resourceMetadata: {
1007 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
1008 | }
1009 | name: 'exceptions/browser'
1010 | aggregationType: 7
1011 | namespace: 'microsoft.insights/components'
1012 | metricVisualization: {
1013 | displayName: 'Browser exceptions'
1014 | color: '#47BDF5'
1015 | }
1016 | }
1017 | ]
1018 | title: 'Browser exceptions'
1019 | visualization: {
1020 | chartType: 2
1021 | legendVisualization: {
1022 | isVisible: true
1023 | position: 2
1024 | hideSubtitle: false
1025 | }
1026 | axisVisualization: {
1027 | x: {
1028 | isVisible: true
1029 | axisType: 2
1030 | }
1031 | y: {
1032 | isVisible: true
1033 | axisType: 1
1034 | }
1035 | }
1036 | }
1037 | }
1038 | }
1039 | }
1040 | {
1041 | name: 'sharedTimeRange'
1042 | isOptional: true
1043 | }
1044 | ]
1045 | #disable-next-line BCP036
1046 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
1047 | settings: {}
1048 | }
1049 | }
1050 | {
1051 | position: {
1052 | x: 0
1053 | y: 8
1054 | colSpan: 4
1055 | rowSpan: 3
1056 | }
1057 | metadata: {
1058 | inputs: [
1059 | {
1060 | name: 'options'
1061 | value: {
1062 | chart: {
1063 | metrics: [
1064 | {
1065 | resourceMetadata: {
1066 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
1067 | }
1068 | name: 'availabilityResults/count'
1069 | aggregationType: 7
1070 | namespace: 'microsoft.insights/components'
1071 | metricVisualization: {
1072 | displayName: 'Availability test results count'
1073 | color: '#47BDF5'
1074 | }
1075 | }
1076 | ]
1077 | title: 'Availability test results count'
1078 | visualization: {
1079 | chartType: 2
1080 | legendVisualization: {
1081 | isVisible: true
1082 | position: 2
1083 | hideSubtitle: false
1084 | }
1085 | axisVisualization: {
1086 | x: {
1087 | isVisible: true
1088 | axisType: 2
1089 | }
1090 | y: {
1091 | isVisible: true
1092 | axisType: 1
1093 | }
1094 | }
1095 | }
1096 | }
1097 | }
1098 | }
1099 | {
1100 | name: 'sharedTimeRange'
1101 | isOptional: true
1102 | }
1103 | ]
1104 | #disable-next-line BCP036
1105 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
1106 | settings: {}
1107 | }
1108 | }
1109 | {
1110 | position: {
1111 | x: 4
1112 | y: 8
1113 | colSpan: 4
1114 | rowSpan: 3
1115 | }
1116 | metadata: {
1117 | inputs: [
1118 | {
1119 | name: 'options'
1120 | value: {
1121 | chart: {
1122 | metrics: [
1123 | {
1124 | resourceMetadata: {
1125 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
1126 | }
1127 | name: 'performanceCounters/processIOBytesPerSecond'
1128 | aggregationType: 4
1129 | namespace: 'microsoft.insights/components'
1130 | metricVisualization: {
1131 | displayName: 'Process IO rate'
1132 | color: '#47BDF5'
1133 | }
1134 | }
1135 | ]
1136 | title: 'Average process I/O rate'
1137 | visualization: {
1138 | chartType: 2
1139 | legendVisualization: {
1140 | isVisible: true
1141 | position: 2
1142 | hideSubtitle: false
1143 | }
1144 | axisVisualization: {
1145 | x: {
1146 | isVisible: true
1147 | axisType: 2
1148 | }
1149 | y: {
1150 | isVisible: true
1151 | axisType: 1
1152 | }
1153 | }
1154 | }
1155 | }
1156 | }
1157 | }
1158 | {
1159 | name: 'sharedTimeRange'
1160 | isOptional: true
1161 | }
1162 | ]
1163 | #disable-next-line BCP036
1164 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
1165 | settings: {}
1166 | }
1167 | }
1168 | {
1169 | position: {
1170 | x: 8
1171 | y: 8
1172 | colSpan: 4
1173 | rowSpan: 3
1174 | }
1175 | metadata: {
1176 | inputs: [
1177 | {
1178 | name: 'options'
1179 | value: {
1180 | chart: {
1181 | metrics: [
1182 | {
1183 | resourceMetadata: {
1184 | id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}'
1185 | }
1186 | name: 'performanceCounters/memoryAvailableBytes'
1187 | aggregationType: 4
1188 | namespace: 'microsoft.insights/components'
1189 | metricVisualization: {
1190 | displayName: 'Available memory'
1191 | color: '#47BDF5'
1192 | }
1193 | }
1194 | ]
1195 | title: 'Average available memory'
1196 | visualization: {
1197 | chartType: 2
1198 | legendVisualization: {
1199 | isVisible: true
1200 | position: 2
1201 | hideSubtitle: false
1202 | }
1203 | axisVisualization: {
1204 | x: {
1205 | isVisible: true
1206 | axisType: 2
1207 | }
1208 | y: {
1209 | isVisible: true
1210 | axisType: 1
1211 | }
1212 | }
1213 | }
1214 | }
1215 | }
1216 | }
1217 | {
1218 | name: 'sharedTimeRange'
1219 | isOptional: true
1220 | }
1221 | ]
1222 | #disable-next-line BCP036
1223 | type: 'Extension/HubsExtension/PartType/MonitorChartPart'
1224 | settings: {}
1225 | }
1226 | }
1227 | ]
1228 | }
1229 | ]
1230 | }
1231 | }
1232 |
1233 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = {
1234 | name: applicationInsightsName
1235 | }
1236 |
--------------------------------------------------------------------------------
/infra/core/monitor/applicationinsights.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param dashboardName string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | param logAnalyticsWorkspaceId string
7 |
8 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = {
9 | name: name
10 | location: location
11 | tags: tags
12 | kind: 'web'
13 | properties: {
14 | Application_Type: 'web'
15 | WorkspaceResourceId: logAnalyticsWorkspaceId
16 | }
17 | }
18 |
19 | module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = {
20 | name: 'application-insights-dashboard'
21 | params: {
22 | name: dashboardName
23 | location: location
24 | applicationInsightsName: applicationInsights.name
25 | }
26 | }
27 |
28 | output connectionString string = applicationInsights.properties.ConnectionString
29 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey
30 | output name string = applicationInsights.name
31 |
--------------------------------------------------------------------------------
/infra/core/monitor/loganalytics.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = {
6 | name: name
7 | location: location
8 | tags: tags
9 | properties: any({
10 | retentionInDays: 30
11 | features: {
12 | searchVersion: 1
13 | }
14 | sku: {
15 | name: 'PerGB2018'
16 | }
17 | })
18 | }
19 |
20 | output id string = logAnalytics.id
21 | output name string = logAnalytics.name
22 |
--------------------------------------------------------------------------------
/infra/core/monitor/monitoring.bicep:
--------------------------------------------------------------------------------
1 | param logAnalyticsName string
2 | param applicationInsightsName string
3 | param applicationInsightsDashboardName string
4 | param location string = resourceGroup().location
5 | param tags object = {}
6 |
7 | module logAnalytics 'loganalytics.bicep' = {
8 | name: 'loganalytics'
9 | params: {
10 | name: logAnalyticsName
11 | location: location
12 | tags: tags
13 | }
14 | }
15 |
16 | module applicationInsights 'applicationinsights.bicep' = {
17 | name: 'applicationinsights'
18 | params: {
19 | name: applicationInsightsName
20 | location: location
21 | tags: tags
22 | dashboardName: applicationInsightsDashboardName
23 | logAnalyticsWorkspaceId: logAnalytics.outputs.id
24 | }
25 | }
26 |
27 | output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString
28 | output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey
29 | output applicationInsightsName string = applicationInsights.outputs.name
30 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id
31 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name
32 |
--------------------------------------------------------------------------------
/infra/core/networking/cdn-endpoint.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | @description('The name of the CDN profile resource')
6 | @minLength(1)
7 | param cdnProfileName string
8 |
9 | @description('Delivery policy rules')
10 | param deliveryPolicyRules array = []
11 |
12 | @description('The origin URL for the endpoint')
13 | @minLength(1)
14 | param originUrl string
15 |
16 | resource endpoint 'Microsoft.Cdn/profiles/endpoints@2022-05-01-preview' = {
17 | parent: cdnProfile
18 | name: name
19 | location: location
20 | tags: tags
21 | properties: {
22 | originHostHeader: originUrl
23 | isHttpAllowed: false
24 | isHttpsAllowed: true
25 | queryStringCachingBehavior: 'UseQueryString'
26 | optimizationType: 'GeneralWebDelivery'
27 | origins: [
28 | {
29 | name: replace(originUrl, '.', '-')
30 | properties: {
31 | hostName: originUrl
32 | originHostHeader: originUrl
33 | priority: 1
34 | weight: 1000
35 | enabled: true
36 | }
37 | }
38 | ]
39 | deliveryPolicy: {
40 | rules: deliveryPolicyRules
41 | }
42 | }
43 | }
44 |
45 | resource cdnProfile 'Microsoft.Cdn/profiles@2022-05-01-preview' existing = {
46 | name: cdnProfileName
47 | }
48 |
49 | output id string = endpoint.id
50 | output name string = endpoint.name
51 | output uri string = 'https://${endpoint.properties.hostName}'
52 |
--------------------------------------------------------------------------------
/infra/core/networking/cdn-profile.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | @description('The pricing tier of this CDN profile')
6 | @allowed([
7 | 'Custom_Verizon'
8 | 'Premium_AzureFrontDoor'
9 | 'Premium_Verizon'
10 | 'StandardPlus_955BandWidth_ChinaCdn'
11 | 'StandardPlus_AvgBandWidth_ChinaCdn'
12 | 'StandardPlus_ChinaCdn'
13 | 'Standard_955BandWidth_ChinaCdn'
14 | 'Standard_Akamai'
15 | 'Standard_AvgBandWidth_ChinaCdn'
16 | 'Standard_AzureFrontDoor'
17 | 'Standard_ChinaCdn'
18 | 'Standard_Microsoft'
19 | 'Standard_Verizon'
20 | ])
21 | param sku string = 'Standard_Microsoft'
22 |
23 | resource profile 'Microsoft.Cdn/profiles@2022-05-01-preview' = {
24 | name: name
25 | location: location
26 | tags: tags
27 | sku: {
28 | name: sku
29 | }
30 | }
31 |
32 | output id string = profile.id
33 | output name string = profile.name
34 |
--------------------------------------------------------------------------------
/infra/core/networking/cdn.bicep:
--------------------------------------------------------------------------------
1 | // Module to create a CDN profile with a single endpoint
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | @description('Name of the CDN endpoint resource')
6 | param cdnEndpointName string
7 |
8 | @description('Name of the CDN profile resource')
9 | param cdnProfileName string
10 |
11 | @description('Delivery policy rules')
12 | param deliveryPolicyRules array = []
13 |
14 | @description('Origin URL for the CDN endpoint')
15 | param originUrl string
16 |
17 | module cdnProfile 'cdn-profile.bicep' = {
18 | name: 'cdn-profile'
19 | params: {
20 | name: cdnProfileName
21 | location: location
22 | tags: tags
23 | }
24 | }
25 |
26 | module cdnEndpoint 'cdn-endpoint.bicep' = {
27 | name: 'cdn-endpoint'
28 | params: {
29 | name: cdnEndpointName
30 | location: location
31 | tags: tags
32 | cdnProfileName: cdnProfile.outputs.name
33 | originUrl: originUrl
34 | deliveryPolicyRules: deliveryPolicyRules
35 | }
36 | }
37 |
38 | output endpointName string = cdnEndpoint.outputs.name
39 | output endpointId string = cdnEndpoint.outputs.id
40 | output profileName string = cdnProfile.outputs.name
41 | output profileId string = cdnProfile.outputs.id
42 | output uri string = cdnEndpoint.outputs.uri
43 |
--------------------------------------------------------------------------------
/infra/core/search/search-services.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param sku object = {
6 | name: 'standard'
7 | }
8 |
9 | param authOptions object = {}
10 | param disableLocalAuth bool = false
11 | param disabledDataExfiltrationOptions array = []
12 | param encryptionWithCmk object = {
13 | enforcement: 'Unspecified'
14 | }
15 | @allowed([
16 | 'default'
17 | 'highDensity'
18 | ])
19 | param hostingMode string = 'default'
20 | param networkRuleSet object = {
21 | bypass: 'None'
22 | ipRules: []
23 | }
24 | param partitionCount int = 1
25 | @allowed([
26 | 'enabled'
27 | 'disabled'
28 | ])
29 | param publicNetworkAccess string = 'enabled'
30 | param replicaCount int = 1
31 | @allowed([
32 | 'disabled'
33 | 'free'
34 | 'standard'
35 | ])
36 | param semanticSearch string = 'disabled'
37 |
38 | resource search 'Microsoft.Search/searchServices@2021-04-01-preview' = {
39 | name: name
40 | location: location
41 | tags: tags
42 | identity: {
43 | type: 'SystemAssigned'
44 | }
45 | properties: {
46 | authOptions: authOptions
47 | disableLocalAuth: disableLocalAuth
48 | disabledDataExfiltrationOptions: disabledDataExfiltrationOptions
49 | encryptionWithCmk: encryptionWithCmk
50 | hostingMode: hostingMode
51 | networkRuleSet: networkRuleSet
52 | partitionCount: partitionCount
53 | publicNetworkAccess: publicNetworkAccess
54 | replicaCount: replicaCount
55 | semanticSearch: semanticSearch
56 | }
57 | sku: sku
58 | }
59 |
60 | output id string = search.id
61 | output endpoint string = 'https://${name}.search.windows.net/'
62 | output name string = search.name
63 |
--------------------------------------------------------------------------------
/infra/core/security/keyvault-access.bicep:
--------------------------------------------------------------------------------
1 | param name string = 'add'
2 |
3 | param keyVaultName string
4 | param permissions object = { secrets: [ 'get', 'list' ] }
5 | param principalId string
6 |
7 | resource keyVaultAccessPolicies 'Microsoft.KeyVault/vaults/accessPolicies@2022-07-01' = {
8 | parent: keyVault
9 | name: name
10 | properties: {
11 | accessPolicies: [ {
12 | objectId: principalId
13 | tenantId: subscription().tenantId
14 | permissions: permissions
15 | } ]
16 | }
17 | }
18 |
19 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = {
20 | name: keyVaultName
21 | }
22 |
--------------------------------------------------------------------------------
/infra/core/security/keyvault-secret.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param tags object = {}
3 | param keyVaultName string
4 | param contentType string = 'string'
5 | @description('The value of the secret. Provide only derived values like blob storage access, but do not hard code any secrets in your templates')
6 | @secure()
7 | param secretValue string
8 |
9 | param enabled bool = true
10 | param exp int = 0
11 | param nbf int = 0
12 |
13 | resource keyVaultSecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = {
14 | name: name
15 | tags: tags
16 | parent: keyVault
17 | properties: {
18 | attributes: {
19 | enabled: enabled
20 | exp: exp
21 | nbf: nbf
22 | }
23 | contentType: contentType
24 | value: secretValue
25 | }
26 | }
27 |
28 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = {
29 | name: keyVaultName
30 | }
31 |
--------------------------------------------------------------------------------
/infra/core/security/keyvault.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param principalId string = ''
6 |
7 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' = {
8 | name: name
9 | location: location
10 | tags: tags
11 | properties: {
12 | tenantId: subscription().tenantId
13 | sku: { family: 'A', name: 'standard' }
14 | accessPolicies: !empty(principalId) ? [
15 | {
16 | objectId: principalId
17 | permissions: { secrets: [ 'get', 'list' ] }
18 | tenantId: subscription().tenantId
19 | }
20 | ] : []
21 | }
22 | }
23 |
24 | output endpoint string = keyVault.properties.vaultUri
25 | output name string = keyVault.name
26 |
--------------------------------------------------------------------------------
/infra/core/security/registry-access.bicep:
--------------------------------------------------------------------------------
1 | param containerRegistryName string
2 | param principalId string
3 |
4 | var acrPullRole = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d')
5 |
6 | resource aksAcrPull 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
7 | scope: containerRegistry // Use when specifying a scope that is different than the deployment scope
8 | name: guid(subscription().id, resourceGroup().id, principalId, acrPullRole)
9 | properties: {
10 | roleDefinitionId: acrPullRole
11 | principalType: 'ServicePrincipal'
12 | principalId: principalId
13 | }
14 | }
15 |
16 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' existing = {
17 | name: containerRegistryName
18 | }
19 |
--------------------------------------------------------------------------------
/infra/core/security/role.bicep:
--------------------------------------------------------------------------------
1 | param principalId string
2 |
3 | @allowed([
4 | 'Device'
5 | 'ForeignGroup'
6 | 'Group'
7 | 'ServicePrincipal'
8 | 'User'
9 | ])
10 | param principalType string = 'ServicePrincipal'
11 | param roleDefinitionId string
12 |
13 | resource role 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
14 | name: guid(subscription().id, resourceGroup().id, principalId, roleDefinitionId)
15 | properties: {
16 | principalId: principalId
17 | principalType: principalType
18 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionId)
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/infra/core/storage/storage-account.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | @allowed([
6 | 'Cool'
7 | 'Hot'
8 | 'Premium' ])
9 | param accessTier string = 'Hot'
10 | param allowBlobPublicAccess bool = true
11 | param allowCrossTenantReplication bool = true
12 | param allowSharedKeyAccess bool = true
13 | param containers array = []
14 | param defaultToOAuthAuthentication bool = false
15 | param deleteRetentionPolicy object = {}
16 | @allowed([ 'AzureDnsZone', 'Standard' ])
17 | param dnsEndpointType string = 'Standard'
18 | param kind string = 'StorageV2'
19 | param minimumTlsVersion string = 'TLS1_2'
20 | param networkAcls object = {
21 | bypass: 'AzureServices'
22 | defaultAction: 'Allow'
23 | }
24 | @allowed([ 'Enabled', 'Disabled' ])
25 | param publicNetworkAccess string = 'Enabled'
26 | param sku object = { name: 'Standard_LRS' }
27 |
28 | resource storage 'Microsoft.Storage/storageAccounts@2022-05-01' = {
29 | name: name
30 | location: location
31 | tags: tags
32 | kind: kind
33 | sku: sku
34 | properties: {
35 | accessTier: accessTier
36 | allowBlobPublicAccess: allowBlobPublicAccess
37 | allowCrossTenantReplication: allowCrossTenantReplication
38 | allowSharedKeyAccess: allowSharedKeyAccess
39 | defaultToOAuthAuthentication: defaultToOAuthAuthentication
40 | dnsEndpointType: dnsEndpointType
41 | minimumTlsVersion: minimumTlsVersion
42 | networkAcls: networkAcls
43 | publicNetworkAccess: publicNetworkAccess
44 | }
45 |
46 | resource blobServices 'blobServices' = if (!empty(containers)) {
47 | name: 'default'
48 | properties: {
49 | deleteRetentionPolicy: deleteRetentionPolicy
50 | }
51 | resource container 'containers' = [for container in containers: {
52 | name: container.name
53 | properties: {
54 | publicAccess: contains(container, 'publicAccess') ? container.publicAccess : 'None'
55 | }
56 | }]
57 | }
58 | }
59 |
60 | output name string = storage.name
61 | output primaryEndpoints object = storage.properties.primaryEndpoints
62 |
--------------------------------------------------------------------------------
/infra/kmeans.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param identityName string
6 | param containerAppsEnvironmentName string
7 | param containerRegistryName string
8 | param serviceName string = 'kmeans'
9 | param azureSqlConnectionString string
10 | param exists bool
11 |
12 | resource apiIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' = {
13 | name: identityName
14 | location: location
15 | }
16 |
17 | module app 'core/host/container-app-upsert.bicep' = {
18 | name: '${serviceName}-container-app-module'
19 | params: {
20 | name: name
21 | location: location
22 | tags: union(tags, { 'azd-service-name': serviceName })
23 | identityName: apiIdentity.name
24 | exists: exists
25 | containerAppsEnvironmentName: containerAppsEnvironmentName
26 | containerRegistryName: containerRegistryName
27 | targetPort: 8000
28 | env: [
29 | {
30 | name: 'MSSQL'
31 | value: azureSqlConnectionString
32 | }
33 | ]
34 | containerCpuCoreCount: '2'
35 | containerMemory: '4.0Gi'
36 | }
37 | }
38 |
39 | output SERVICE_API_IDENTITY_PRINCIPAL_ID string = apiIdentity.properties.principalId
40 | output SERVICE_API_NAME string = app.outputs.name
41 | output SERVICE_API_URI string = app.outputs.uri
42 | output SERVICE_API_IMAGE_NAME string = app.outputs.imageName
43 |
--------------------------------------------------------------------------------
/infra/kmeans.parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
3 | "contentVersion": "1.0.0.0",
4 | "parameters": {
5 | "location": {
6 | "value": "${AZURE_LOCATION}"
7 | },
8 | "name": {
9 | "value": "${SERVICE_API_NAME}"
10 | },
11 | "imageName": {
12 | "value": "${SERVICE_API_IMAGE_NAME}"
13 | },
14 | "containerAppsEnvironmentName": {
15 | "value": "${AZURE_CONTAINER_ENVIRONMENT_NAME}"
16 | },
17 | "containerRegistryName": {
18 | "value": "${AZURE_CONTAINER_REGISTRY_NAME}"
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/infra/main.bicep:
--------------------------------------------------------------------------------
1 | targetScope = 'subscription'
2 |
3 | @minLength(1)
4 | @maxLength(64)
5 | @description('Name which is used to generate a short unique hash for each resource')
6 | param name string
7 |
8 | @minLength(1)
9 | @description('Primary location for all resources')
10 | param location string
11 |
12 | @secure()
13 | @description('Connection string to the Azure SQL database to use')
14 | param azureSqlConnectionString string
15 |
16 | param apiAppExists bool = false
17 |
18 | var resourceToken = toLower(uniqueString(subscription().id, name, location))
19 | var tags = { 'azd-env-name': name }
20 |
21 | resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
22 | name: '${name}-rg'
23 | location: location
24 | tags: tags
25 | }
26 |
27 | var prefix = '${name}-${resourceToken}'
28 |
29 | // Container apps host (including container registry)
30 | module containerApps 'core/host/container-apps.bicep' = {
31 | name: 'container-apps'
32 | scope: resourceGroup
33 | params: {
34 | name: 'app'
35 | location: location
36 | tags: tags
37 | containerAppsEnvironmentName: '${prefix}-containerapps-env'
38 | containerRegistryName: '${replace(prefix, '-', '')}registry'
39 | logAnalyticsWorkspaceName: logAnalyticsWorkspace.outputs.name
40 | }
41 | }
42 |
43 | // kmeans app
44 | module kmeans 'kmeans.bicep' = {
45 | name: 'kmeans'
46 | scope: resourceGroup
47 | params: {
48 | name: replace('${take(prefix,19)}-ca', '--', '-')
49 | location: location
50 | tags: tags
51 | identityName: '${prefix}-mi'
52 | containerAppsEnvironmentName: containerApps.outputs.environmentName
53 | containerRegistryName: containerApps.outputs.registryName
54 | exists: apiAppExists
55 | azureSqlConnectionString: azureSqlConnectionString
56 | }
57 | }
58 |
59 |
60 | module logAnalyticsWorkspace 'core/monitor/loganalytics.bicep' = {
61 | name: 'loganalytics'
62 | scope: resourceGroup
63 | params: {
64 | name: '${prefix}-loganalytics'
65 | location: location
66 | tags: tags
67 | }
68 | }
69 |
70 | output AZURE_LOCATION string = location
71 | output AZURE_CONTAINER_ENVIRONMENT_NAME string = containerApps.outputs.environmentName
72 | output AZURE_CONTAINER_REGISTRY_NAME string = containerApps.outputs.registryName
73 | output AZURE_CONTAINER_REGISTRY_ENDPOINT string = containerApps.outputs.registryLoginServer
74 | output SERVICE_API_IDENTITY_PRINCIPAL_ID string = kmeans.outputs.SERVICE_API_IDENTITY_PRINCIPAL_ID
75 | output SERVICE_API_NAME string = kmeans.outputs.SERVICE_API_NAME
76 | output SERVICE_API_URI string = kmeans.outputs.SERVICE_API_URI
77 | output SERVICE_API_IMAGE_NAME string = kmeans.outputs.SERVICE_API_IMAGE_NAME
78 | output SERVICE_API_ENDPOINTS array = ['${kmeans.outputs.SERVICE_API_URI}/generate_name']
79 |
--------------------------------------------------------------------------------
/infra/main.parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
3 | "contentVersion": "1.0.0.0",
4 | "parameters": {
5 | "name": {
6 | "value": "${AZURE_ENV_NAME}"
7 | },
8 | "location": {
9 | "value": "${AZURE_LOCATION}"
10 | },
11 | "apiAppExists": {
12 | "value": "${SERVICE_API_RESOURCE_EXISTS=false}"
13 | },
14 | "azureSqlConnectionString": {
15 | "value": "${MSSQL}"
16 | }
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/sample-data/01-import-data.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Cleanup if needed
3 | */
4 | if not exists(select * from sys.symmetric_keys where [name] = '##MS_DatabaseMasterKey##')
5 | begin
6 | create master key encryption by password = 'Pa$$w0rd!'
7 | end
8 | go
9 | if exists(select * from sys.[external_data_sources] where name = 'openai_playground')
10 | begin
11 | drop external data source [openai_playground];
12 | end
13 | go
14 | if exists(select * from sys.[database_scoped_credentials] where name = 'openai_playground')
15 | begin
16 | drop database scoped credential [openai_playground];
17 | end
18 | go
19 |
20 | /*
21 | Create database scoped credential and external data source.
22 | File is assumed to be in a path like:
23 | https://.blob.core.windows.net/playground/wikipedia/vector_database_wikipedia_articles_embedded.csv
24 |
25 | Please note that it is recommened to avoid using SAS tokens: the best practice is to use Managed Identity as described here:
26 | https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-bulk-data-by-using-bulk-insert-or-openrowset-bulk-sql-server?view=sql-server-ver16#bulk-importing-from-azure-blob-storage
27 | */
28 | create database scoped credential [openai_playground]
29 | with identity = 'SHARED ACCESS SIGNATURE',
30 | secret = ''; -- make sure not to include the ? at the beginning
31 | go
32 | create external data source [openai_playground]
33 | with
34 | (
35 | type = blob_storage,
36 | location = 'https://.blob.core.windows.net/playground',
37 | credential = [openai_playground]
38 | );
39 | go
40 |
41 | /*
42 | Create table
43 | */
44 | drop table if exists [dbo].[wikipedia_articles_embeddings];
45 | create table [dbo].[wikipedia_articles_embeddings]
46 | (
47 | [id] [int] not null,
48 | [url] [varchar](1000) not null,
49 | [title] [varchar](1000) not null,
50 | [text] [varchar](max) not null,
51 | [title_vector] [varchar](max) not null,
52 | [content_vector] [varchar](max) not null,
53 | [vector_id] [int] not null
54 | )
55 | go
56 |
57 | /*
58 | Import data
59 | */
60 | bulk insert dbo.[wikipedia_articles_embeddings]
61 | from 'wikipedia/vector_database_wikipedia_articles_embedded.csv'
62 | with (
63 | data_source = 'openai_playground',
64 | format = 'csv',
65 | firstrow = 2,
66 | codepage = '65001',
67 | fieldterminator = ',',
68 | rowterminator = '0x0a',
69 | fieldquote = '"',
70 | batchsize = 1000,
71 | tablock
72 | )
73 | go
74 |
75 | /*
76 | Verify data
77 | */
78 | select top (100) * from [dbo].[wikipedia_articles_embeddings]
79 | go
80 |
81 | select * from [dbo].[wikipedia_articles_embeddings] where title = 'Alan Turing'
82 | go
--------------------------------------------------------------------------------
/sample-data/02-use-native-vectors.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Add columns to store the native vectors
3 | */
4 | alter table wikipedia_articles_embeddings
5 | add title_vector_ada2 vector(1536);
6 |
7 | alter table wikipedia_articles_embeddings
8 | add content_vector_ada2 vector(1536);
9 | go
10 |
11 | /*
12 | Update the native vectors
13 | */
14 | update
15 | wikipedia_articles_embeddings
16 | set
17 | title_vector_ada2 = cast(title_vector as vector(1536)),
18 | content_vector_ada2 = cast(content_vector as vector(1536))
19 | go
20 |
21 | /*
22 | Remove old columns
23 | */
24 | alter table wikipedia_articles_embeddings
25 | drop column title_vector;
26 | go
27 |
28 | alter table wikipedia_articles_embeddings
29 | drop column content_vector;
30 | go
31 |
32 | /*
33 | Add primary key
34 | */
35 | alter table [dbo].[wikipedia_articles_embeddings]
36 | add constraint pk__wikipedia_articles_embeddings primary key clustered (id)
37 | go
38 |
39 | /*
40 | Add index on title
41 | */
42 | create index [ix_title] on [dbo].[wikipedia_articles_embeddings](title)
43 | go
44 |
45 | /*
46 | Verify data
47 | */
48 | select top (100) * from [dbo].[wikipedia_articles_embeddings]
49 | go
50 |
51 | select * from [dbo].[wikipedia_articles_embeddings] where title = 'Alan Turing'
52 | go
53 |
--------------------------------------------------------------------------------
/sample-data/readme.md:
--------------------------------------------------------------------------------
1 | # Wikipedia Sample Dataset
2 |
3 | ## Download dataset
4 |
5 | Download the wikipedia embeddings from here: https://cdn.openai.com/API/examples/data/vector_database_wikipedia_articles_embedded.zip and unzip it in the `/sample-data` folder.
6 |
7 | In Windows with powershell:
8 |
9 | ```powershell
10 | Invoke-WebRequest -Uri "https://cdn.openai.com/API/examples/data/vector_database_wikipedia_articles_embedded.zip" -OutFile "vector_database_wikipedia_articles_embedded.zip"
11 | ```
12 |
13 | or on Linux/MacOS with wget
14 |
15 | ```bash
16 | wget https://cdn.openai.com/API/examples/data/vector_database_wikipedia_articles_embedded.zip
17 | ```
18 |
19 | Then unzip its content in the src/sample-data folder.
20 |
21 | In Windows with powershell:
22 |
23 | ```powershell
24 | Expand-Archive .\vector_database_wikipedia_articles_embedded.zip .
25 | ```
26 |
27 | or on Linux/MacOS with unzip:
28 |
29 | ```bash
30 | unzip ./vector_database_wikipedia_articles_embedded.zip
31 | ```
32 |
33 | ## Import dataset into Azure SQL
34 |
35 | Upload the `vector_database_wikipedia_articles_embedded.csv` file (using [Azure Storage Explorer](https://learn.microsoft.com/azure/vs-azure-tools-storage-manage-with-storage-explorer?tabs=windows) for example) to an Azure Blob Storage container.
36 |
37 | For this the example, the unzipped csv file `vector_database_wikipedia_articles_embedded.csv` is assumed to be uploaded to a blob container name `playground` and in a folder named `wikipedia`.
38 |
39 | Once the file is uploaded, get the [SAS token](https://learn.microsoft.com/azure/storage/common/storage-sas-overview) to allow Azure SQL database to access it. (From Azure storage Explorer, right click on the `playground` container and than select `Get Shared Access Signature`. Set the expiration date to some time in future and then click on "Create". Copy the generated query string somewhere, for example into the Notepad, as it will be needed later)
40 |
41 | Use a client tool like [Azure Data Studio](https://azure.microsoft.com/products/data-studio/) to connect to an Azure SQL database and then use the `01-import-data.sql` to create the `wikipedia_articles_embeddings` where the uploaded CSV file will be imported.
42 |
43 | Make sure to replace the `` and `` placeholders with the value correct for your environment:
44 |
45 | - `` is the name of the storage account where the CSV file has been uploaded
46 | - `` is the Share Access Signature obtained before
47 |
48 | Run each section (each section starts with a comment) separately. At the end of the process (will take up to a couple of minutes) you will have all the CSV data imported in the `wikipedia_articles_embeddings` table.
49 |
50 | ## Convert the existing vector embedding into a native binary format
51 |
52 | The embeddings are available in a JSON array and they can be converted into the native binary format using the script `02-use-native-vectors.sql`
--------------------------------------------------------------------------------
/src/.dockerignore:
--------------------------------------------------------------------------------
1 | .devcontainer/
2 | .azure/
3 | .github/
4 | __pycache__/
5 | infra/
6 | sample-data/
7 |
--------------------------------------------------------------------------------
/src/.env.sample:
--------------------------------------------------------------------------------
1 | MSSQL='Driver={ODBC Driver 18 for SQL Server};Server=.database.windows.net;Database=vectordb;Uid=vectordb_user;Pwd=rANd0m_PAzzw0rd!;Connection Timeout=30;'
2 |
--------------------------------------------------------------------------------
/src/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:24.1.2-0
2 |
3 | # apt-get and system utilities
4 | RUN apt-get update && apt-get install -y \
5 | curl apt-transport-https debconf-utils gnupg2
6 |
7 | # adding custom MS repository
8 | RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
9 | RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
10 |
11 | # install AZ CLI
12 | RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash
13 |
14 | # install SQL Server drivers and tools
15 | RUN apt-get update && ACCEPT_EULA=Y apt-get install -y msodbcsql18 mssql-tools18
16 | RUN echo 'export PATH="$PATH:/opt/mssql-tools18/bin"' >> ~/.bashrc
17 | RUN /bin/bash -c "source ~/.bashrc"
18 | RUN apt-get -y install locales \
19 | && rm -rf /var/lib/apt/lists/*
20 | RUN locale-gen en_US.UTF-8
21 | RUN localedef -i en_US -f UTF-8 en_US.UTF-8
22 |
23 | # install packages
24 | RUN conda install -c conda-forge \
25 | scikit-learn \
26 | pyodbc \
27 | python-multipart python-dotenv \
28 | uvicorn[standard] \
29 | fastapi apscheduler \
30 | azure-identity \
31 | -y
32 |
33 | COPY . .
34 |
35 | EXPOSE 8000
36 |
37 | CMD ["uvicorn", "main:api", "--host", "0.0.0.0", "--port", "8000"]
38 |
--------------------------------------------------------------------------------
/src/db/database.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | import pyodbc
4 | import logging
5 | import json
6 | from .utils import Buffer, VectorSet, NpEncoder, DataSourceConfig
7 | import struct
8 | import logging
9 | from azure import identity
10 | from azure.core import credentials
11 |
12 | _logger = logging.getLogger("uvicorn")
13 |
14 | class DatabaseEngineException(Exception):
15 | pass
16 |
17 | class DatabaseEngine:
18 | def __init__(self) -> None:
19 | self._index_id = None
20 | self._token:credentials.AccessToken = None
21 |
22 | def __get_mssql_connection(self):
23 | _logger.debug('Connecting to MSSQL...')
24 |
25 | mssql_connection_string = os.environ["MSSQL"]
26 |
27 | if any(s in mssql_connection_string.lower() for s in ["uid"]):
28 | _logger.debug('Using SQL Server authentication')
29 | attrs_before = None
30 | else:
31 | if (self._token != None and self._token.expires_on < datetime.datetime.now().timestamp()):
32 | _logger.info('Token expired. Refresh is needed.')
33 | self._token = None
34 |
35 | if (self._token == None):
36 | _logger.info('Getting EntraID credentials...')
37 | mssql_connection_string = os.environ["MSSQL"]
38 | credential = identity.DefaultAzureCredential(exclude_interactive_browser_credential=False)
39 | self._token = credential.get_token("https://database.windows.net/.default")
40 |
41 | token_bytes = self._token.token.encode("UTF-16-LE")
42 | token_struct = struct.pack(f' int:
184 | id = None
185 | conn = self.__get_mssql_connection()
186 |
187 | try:
188 | cursor = conn.cursor()
189 |
190 | id = cursor.execute("""
191 | select id from [$vector].[kmeans] where [source_table_name] = ? and [vector_column_name] = ?;
192 | """,
193 | self._source_table_fqname,
194 | self._source_vector_column_name
195 | ).fetchval()
196 | if (id != None):
197 | if (force == False):
198 | raise DatabaseEngineException(f"Index for {self._source_table_fqname}.{self._source_vector_column_name} already exists.")
199 | else:
200 | _logger.info(f"Index creation forced over existing index {id}...")
201 |
202 | if (id == None):
203 | _logger.info(f"Registering new index...")
204 | id = cursor.execute("""
205 | set nocount on;
206 | insert into [$vector].[kmeans]
207 | ([source_table_name], [id_column_name], [vector_column_name], [dimensions_count], [status], [updated_on])
208 | values
209 | (?, ?, ?, ?, 'INITIALIZING', sysdatetime());
210 | select scope_identity() as id;
211 | """,
212 | self._source_table_fqname,
213 | self._source_id_column_name,
214 | self._source_vector_column_name,
215 | self._vector_dimensions
216 | ).fetchval()
217 | else:
218 | _logger.info(f"Updating existing index...")
219 | cursor.execute("""
220 | update
221 | [$vector].[kmeans]
222 | set
223 | [status] = 'INITIALIZING',
224 | [item_count] = null,
225 | [updated_on] = sysdatetime()
226 | where
227 | id = ?;
228 | """,
229 | id
230 | )
231 |
232 | cursor.commit()
233 | finally:
234 | conn.close()
235 |
236 | self._index_id = id
237 | return id
238 |
239 | def update_index_metadata(self, status:str):
240 | conn = self.__get_mssql_connection()
241 |
242 | cursor = conn.cursor()
243 | cursor.execute("""
244 | update
245 | [$vector].[kmeans]
246 | set
247 | [status] = ?
248 | where
249 | id = ?;""",
250 | status,
251 | self._index_id,
252 | )
253 | conn.commit()
254 |
255 | cursor.close()
256 | conn.close()
257 |
258 | def finalize_index_metadata(self, vectors_count:int):
259 | conn = self.__get_mssql_connection()
260 |
261 | cursor = conn.cursor()
262 | cursor.execute("""
263 | update
264 | [$vector].[kmeans]
265 | set
266 | [item_count] = ?,
267 | [dimensions_count] = ?,
268 | [status] = 'CREATED',
269 | [updated_on] = sysdatetime()
270 | where
271 | id = ?;""",
272 | vectors_count,
273 | self._vector_dimensions,
274 | self._index_id,
275 | )
276 | conn.commit()
277 |
278 | cursor.close()
279 | conn.close()
280 |
281 | def load_vectors_from_db(self):
282 | query = f"""
283 | select {self._source_id_column_name} as item_id, cast({self._source_vector_column_name} as varchar(max)) as vector from {self._source_table_fqname}
284 | """
285 | buffer = Buffer()
286 | result = VectorSet(self._vector_dimensions)
287 | conn = self.__get_mssql_connection()
288 | cursor = conn.cursor()
289 | cursor.execute(query)
290 | tr = 0
291 | while(True):
292 | buffer.clear()
293 | rows = cursor.fetchmany(50000)
294 | if (rows == []):
295 | break
296 |
297 | for idx, row in enumerate(rows):
298 | buffer.add(row.item_id, json.loads(row.vector))
299 |
300 | result.add(buffer)
301 | tr += (idx+1)
302 |
303 | mf = int(result.get_memory_usage() / 1024 / 1024)
304 | _logger.info("Loaded {0} rows, total rows {1}, total memory footprint {2} MB".format(idx+1, tr, mf))
305 |
306 | cursor.close()
307 | conn.commit()
308 | conn.close()
309 | return result.ids, result.vectors
310 |
311 | def save_clusters_centroids(self, centroids):
312 | conn = self.__get_mssql_connection()
313 | cursor = conn.cursor()
314 | params = [(i, json.dumps(centroids[i], cls=NpEncoder)) for i in range(0, len(centroids))]
315 | cursor = conn.cursor()
316 |
317 | #cursor.fast_executemany = True
318 | _logger.info(f"Saving centroids to {self._clusters_centroids_table_fqname}...")
319 | cursor.execute(f"""
320 | if object_id('{self._clusters_centroids_table_fqname}') is null begin
321 | create table {self._clusters_centroids_table_fqname}
322 | (
323 | cluster_id int not null primary key clustered,
324 | centroid vector({self._vector_dimensions}) not null
325 | )
326 | end
327 | drop table if exists {self._clusters_centroids_tmp_table_fqname}
328 | create table {self._clusters_centroids_tmp_table_fqname}
329 | (
330 | cluster_id int not null primary key clustered,
331 | centroid vector({self._vector_dimensions}) not null
332 | )
333 | """)
334 | cursor.commit()
335 | cursor.executemany(f"""
336 | insert into {self._clusters_centroids_tmp_table_fqname} (cluster_id, centroid) values (?, cast(? as vector({self._vector_dimensions})))
337 | """,
338 | params)
339 | cursor.commit()
340 |
341 | _logger.info("Switching to final centroids table...")
342 | cursor.execute(f"""
343 | begin tran;
344 | drop table if exists {self._clusters_centroids_table_fqname};
345 | alter schema [$vector] transfer {self._clusters_centroids_tmp_table_fqname};
346 | commit tran;
347 | """)
348 | cursor.commit()
349 |
350 | cursor.close()
351 | conn.close()
352 |
353 | _logger.info("Centroids saved.")
354 |
355 | def save_clusters_items(self, ids, labels):
356 | clustered_ids = dict(zip(ids, labels))
357 | params = [(int(ids[i]), int(labels[i])) for i in range(0, len(clustered_ids))]
358 |
359 | conn = self.__get_mssql_connection()
360 | cursor = conn.cursor()
361 | cursor.fast_executemany = True
362 |
363 | _logger.info(f"Saving centroids elements into {self._clusters_table_fqname}...")
364 | cursor.execute(f"drop table if exists {self._clusters_table_fqname}")
365 | cursor.execute(f"""
366 | if object_id('{self._clusters_table_fqname}') is null begin
367 | create table {self._clusters_table_fqname} (
368 | cluster_id int not null,
369 | item_id int not null
370 | )
371 | end
372 | drop table if exists {self._clusters_tmp_table_fqname}
373 | create table {self._clusters_tmp_table_fqname}
374 | (
375 | cluster_id int not null,
376 | item_id int not null
377 | )
378 | """)
379 | cursor.executemany(f"insert into {self._clusters_tmp_table_fqname} (item_id, cluster_id) values (?, ?)", params)
380 | cursor.commit()
381 |
382 | _logger.info("Creating index...")
383 | cursor.execute(f"create clustered index ixc on {self._clusters_tmp_table_fqname} (cluster_id, item_id)")
384 | cursor.commit()
385 |
386 | _logger.info("Switching to final centroids elements table...")
387 | cursor.execute(f"""
388 | drop table if exists {self._clusters_table_fqname};
389 | alter schema [$vector] transfer {self._clusters_tmp_table_fqname};
390 | """)
391 | cursor.commit()
392 |
393 | cursor.close()
394 | conn.commit()
395 | _logger.info("Centroids elements saved.")
396 |
397 | def create_similarity_function(self):
398 | conn = self.__get_mssql_connection()
399 | cursor = conn.cursor()
400 |
401 | _logger.info(f"Creating function {self._function_fqname}...")
402 | cursor = conn.cursor()
403 | cursor.execute(f"""
404 | create or alter function {self._function_fqname} (@v vector({self._vector_dimensions}), @k int, @p int, @d float)
405 | returns table
406 | as return
407 | with cteProbes as
408 | (
409 | select top (@p)
410 | k.cluster_id
411 | from
412 | {self._clusters_centroids_table_fqname} k
413 | order by
414 | vector_distance('cosine', k.[centroid], @v)
415 | )
416 | select top(@k)
417 | v.*,
418 | [$distance] = vector_distance('cosine', v.{self._source_vector_column_name}, @v)
419 | from
420 | cteProbes k
421 | inner join
422 | {self._clusters_table_fqname} c on k.cluster_id = c.cluster_id
423 | inner join
424 | {self._source_table_fqname} v on v.id = c.item_id
425 | where
426 | vector_distance('cosine', v.{self._source_vector_column_name}, @v) <= @d
427 | order by
428 | [$distance]
429 | """)
430 | cursor.close()
431 | conn.commit()
432 | _logger.info(f"Function created.")
433 |
434 |
--------------------------------------------------------------------------------
/src/db/index.py:
--------------------------------------------------------------------------------
1 | class BaseIndex:
2 | def __init__(self) -> None:
3 | self.id:int = None
4 |
5 | def build(self):
6 | pass
7 |
8 | class NoIndex(BaseIndex):
9 | def __init__(self) -> None:
10 | super().__init__()
11 |
--------------------------------------------------------------------------------
/src/db/kmeans.py:
--------------------------------------------------------------------------------
1 | import math
2 | import logging
3 | import numpy as np
4 | from .index import BaseIndex
5 | from .database import DatabaseEngine, DatabaseEngineException
6 | from .utils import DataSourceConfig
7 | from sklearn.cluster import MiniBatchKMeans
8 | from sklearn.preprocessing import normalize
9 |
10 | _logger = logging.getLogger("uvicorn")
11 |
12 | class KMeansIndexIdMap:
13 | ids: np.array
14 | model: MiniBatchKMeans
15 |
16 | def __init__(self, ids:np.array, model:MiniBatchKMeans, vector_count:int, dimensions_count:int) -> None:
17 | self.ids = ids
18 | self.model = model
19 | self.vectors_count:int = vector_count
20 | self.dimensions_count:int = dimensions_count
21 |
22 | class KMeansIndex(BaseIndex):
23 | def __init__(self) -> None:
24 | super().__init__()
25 | self.index = None
26 | self._db:DatabaseEngine = None
27 |
28 | def from_config(config:DataSourceConfig):
29 | index = KMeansIndex()
30 | index._db = DatabaseEngine.from_config(config)
31 | return index
32 |
33 | def from_id(id:int):
34 | index = KMeansIndex()
35 | index._db = DatabaseEngine.from_id(id)
36 | return index
37 |
38 | def initialize_build(self, force: bool)->int:
39 | id = None
40 | try:
41 | self._db.initialize();
42 | id = self._db.create_index_metadata(force)
43 | self.id = id
44 | _logger.info(f"Index has id {id}.")
45 | except DatabaseEngineException as e:
46 | raise Exception(f"Error initializing index: {str(e)}")
47 | return id
48 |
49 | def build(self):
50 | if (self.id == None):
51 | raise Exception("Index has not been initialized.")
52 |
53 | try:
54 | self.index = None
55 |
56 | _logger.info(f"Starting creating IVFFLAT index...")
57 |
58 | _logger.info("Loading data...")
59 | self._db.update_index_metadata("LOADING_DATA")
60 | ids, vectors = self._db.load_vectors_from_db()
61 | _logger.info("Done loading data...")
62 |
63 | _logger.info("Creating kmeans model...")
64 | self._db.update_index_metadata("KMEANS_CLUSTERING")
65 | nvp = np.asarray(vectors)
66 | vector_count:int = np.shape(nvp)[0]
67 | dimensions_count:int = np.shape(nvp)[1]
68 | if (vector_count > 1000000):
69 | clusters = int(math.sqrt(vector_count))
70 | else:
71 | clusters = int(vector_count / 1000) * 2
72 | _logger.info(f"Determining {clusters} clusters...")
73 | kmeans = MiniBatchKMeans(init="k-means++", n_clusters=clusters, n_init=10, random_state=0)
74 | kmeans.fit(nvp)
75 | self.index = KMeansIndexIdMap(ids, kmeans, vector_count, dimensions_count)
76 |
77 | _logger.info(f"Done creating kmeans model ({type(kmeans)}).")
78 |
79 | _logger.info(f"Saving centroids index #{self.id}...")
80 | self._db.update_index_metadata("SAVING_CENTROIDS")
81 | centroids = self.index.model.cluster_centers_
82 | nc = normalize(centroids)
83 | self._db.save_clusters_centroids(nc)
84 | _logger.info(f"Done saving centroids index #{self.id}...")
85 |
86 | _logger.info(f"Saving centroids elements ({len(ids)}) index #{self.id}...")
87 | self._db.update_index_metadata("SAVING_CENTROIDS_ELEMENTS")
88 | ids = self.index.ids
89 | labels = self.index.model.labels_
90 | self._db.save_clusters_items(ids, labels)
91 | _logger.info(f"Done saving centroids elements index #{self.id}...")
92 |
93 | _logger.info(f"Creating similarity function...")
94 | self._db.update_index_metadata("CREATING_SIMILARITY_FUNCTION")
95 | self._db.create_similarity_function()
96 | _logger.info(f"Done creating similarity function.")
97 |
98 | _logger.info(f"Finalizing index #{self.id} metadata...")
99 | self._db.finalize_index_metadata(self.index.vectors_count)
100 | _logger.info(f"Done finalizing metadata.")
101 |
102 | _logger.info(f"IVFFLAT Index #{self.id} created.")
103 | except Exception as e:
104 | self._db.update_index_metadata("ERROR_DURING_CREATION")
105 | raise e
--------------------------------------------------------------------------------
/src/db/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numpy as np
3 | from enum import StrEnum, Enum
4 |
5 | class DataSourceConfig:
6 | source_table_schema:str
7 | source_table_name:str
8 | source_id_column_name:str
9 | source_vector_column_name:str
10 | vector_dimensions:int
11 |
12 | class IndexStatus(StrEnum):
13 | INITIALIZING = 'initializing'
14 | NOT_READY = 'not ready'
15 | LOADING = 'loading'
16 | TRAINED = 'trained'
17 | READY = 'ready'
18 | CREATING = 'creating'
19 | TRAINING = 'training'
20 | NOINDEX = 'noindex'
21 |
22 | class IndexSubStatus(StrEnum):
23 | NONE = 'none'
24 | READY = 'ready'
25 | SAVING = 'saving'
26 |
27 | class UpdateResult(Enum):
28 | DONE = 0
29 | NO_CHANGES = 1
30 | INDEX_NOT_READY = 2
31 | INDEX_IS_STALE = 3
32 | UNKNOWN = -1
33 |
34 | class NpEncoder(json.JSONEncoder):
35 | def default(self, obj):
36 | if isinstance(obj, np.int32):
37 | return int(obj)
38 | if isinstance(obj, np.int64):
39 | return int(obj)
40 | if isinstance(obj, np.integer):
41 | return int(obj)
42 | if isinstance(obj, np.floating):
43 | return float(obj)
44 | if isinstance(obj, np.float32):
45 | return float(obj)
46 | if isinstance(obj, np.ndarray):
47 | return obj.tolist()
48 | return super(NpEncoder, self).default(obj)
49 |
50 | class Buffer:
51 | def __init__(self):
52 | self.ids = []
53 | self.vectors = []
54 |
55 | def add(self, id, vector):
56 | self.ids.append(id)
57 | self.vectors.append(vector)
58 |
59 | def clear(self):
60 | self.ids.clear()
61 | self.vectors.clear()
62 |
63 | class VectorSet:
64 | def __init__(self, vector_dimensions:int):
65 | self.ids = np.empty((0), dtype=np.int32)
66 | self.vectors = np.empty((0, vector_dimensions), dtype=np.float32)
67 |
68 | def add(self, buffer:Buffer):
69 | self.ids = np.append(self.ids, np.asarray(buffer.ids), 0)
70 | self.vectors = np.append(self.vectors, np.asarray(buffer.vectors, dtype=np.float32), 0)
71 |
72 | def get_memory_usage(self):
73 | return self.ids.nbytes + self.vectors.nbytes
74 |
75 |
--------------------------------------------------------------------------------
/src/internals.py:
--------------------------------------------------------------------------------
1 | from db.index import NoIndex
2 | from pydantic import BaseModel, Field
3 |
4 | class TableInfo(BaseModel):
5 | table_schema: str = Field(alias="schema")
6 | table_name: str = Field(alias="name")
7 |
8 | class ColumnInfo(BaseModel):
9 | id: str
10 | vector: str
11 |
12 | class VectorInfo(BaseModel):
13 | dimensions: int
14 |
15 | class IndexRequest(BaseModel):
16 | table: TableInfo
17 | column: ColumnInfo
18 | vector: VectorInfo
19 |
20 | class State:
21 | def __init__(self) -> None:
22 | self.index = NoIndex()
23 | self.current_status = "idle"
24 | self.last_status = "idle"
25 |
26 | def set_status(self, status:str):
27 | self.last_status = self.current_status
28 | self.current_status = status
29 |
30 | def get_status(self)->str:
31 | return {
32 | "status": {
33 | "current": self.current_status,
34 | "last": self.last_status
35 | },
36 | "index_id": self.index.id
37 | }
38 |
39 | def clear(self):
40 | self.last_status = self.current_status
41 | self.current_status = "idle"
42 | self.index = NoIndex()
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import json
4 |
5 | from dotenv import load_dotenv
6 | from fastapi import BackgroundTasks, FastAPI, Response, HTTPException
7 | from contextlib import asynccontextmanager
8 |
9 | from db.index import NoIndex
10 | from db.kmeans import KMeansIndex
11 | from db.utils import DataSourceConfig
12 | from internals import IndexRequest, State
13 |
14 | load_dotenv()
15 |
16 | api_version = "0.0.2"
17 |
18 | _logger = logging.getLogger("uvicorn")
19 |
20 | state = State()
21 |
22 | @asynccontextmanager
23 | async def lifespan(app: FastAPI):
24 | _logger.info("Starting API...")
25 | yield
26 | _logger.info("Closing API...")
27 | state.clear()
28 |
29 | api = FastAPI(lifespan=lifespan)
30 |
31 | @api.get("/")
32 | def welcome():
33 | return {
34 | "server": state.get_status(),
35 | "version": api_version
36 | }
37 |
38 | @api.post("/kmeans/build")
39 | def build(tasks: BackgroundTasks, indexRequest: IndexRequest, force: bool = False):
40 | if (isinstance(state.index, NoIndex) == False):
41 | raise HTTPException(detail=f"An index (#{state.index.id}) is already being built.", status_code=500)
42 |
43 | config = DataSourceConfig()
44 | config.source_table_schema = indexRequest.table.table_schema
45 | config.source_table_name = indexRequest.table.table_name
46 | config.source_id_column_name = indexRequest.column.id
47 | config.source_vector_column_name = indexRequest.column.vector
48 | config.vector_dimensions = indexRequest.vector.dimensions
49 |
50 | try:
51 | state.set_status("initializing")
52 | state.index = KMeansIndex.from_config(config)
53 | state.index.initialize_build(force)
54 | except Exception as e:
55 | _logger.error(f"Error during initialization: {e}")
56 | state.set_status("error during initialization: " + str(e))
57 | state.clear()
58 | raise HTTPException(detail=str(e), status_code=500)
59 |
60 | tasks.add_task(_internal_build)
61 |
62 | r = state.get_status()
63 | j = json.dumps(r, default=str)
64 |
65 | return Response(content=j, status_code=202, media_type='application/json')
66 |
67 | @api.post("/kmeans/rebuild/{index_id}")
68 | def rebuild(tasks: BackgroundTasks, index_id: int):
69 | if (isinstance(state.index, NoIndex) == False):
70 | raise HTTPException(detail=f"An index (#{state.index.id}) is already being built.", status_code=500)
71 |
72 | try:
73 | state.index = KMeansIndex.from_id(index_id)
74 | state.set_status("initializing")
75 | id = state.index.initialize_build(force=True)
76 | except Exception as e:
77 | _logger.error(f"Error during initialization: {e}")
78 | state.set_status("error during initialization: " + str(e))
79 | state.clear()
80 | raise HTTPException(detail=str(e), status_code=500)
81 |
82 | tasks.add_task(_internal_build)
83 |
84 | r = state.get_status()
85 | j = json.dumps(r, default=str)
86 |
87 | return Response(content=j, status_code=202, media_type='application/json')
88 |
89 | def _internal_build():
90 | try:
91 | state.set_status("building")
92 | state.index.build()
93 | except Exception as e:
94 | _logger.error(f"Error building index: {e}")
95 | state.set_status("error during index build: " + str(e))
96 | finally:
97 | state.clear()
--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn
2 | pyodbc
3 | uvicorn[standard]
4 | python-multipart
5 | python-dotenv
6 | fastapi
7 | apscheduler
8 | azure-identity
--------------------------------------------------------------------------------
/src/sql/00-create-user.sql:
--------------------------------------------------------------------------------
1 | /*
2 | ** Make sure to run the script in the `vectordb` database **
3 | */
4 |
5 | /*
6 | Create user to be used by python script
7 | */
8 | if (serverproperty('Edition') = 'SQL Azure') begin
9 |
10 | if not exists (select * from sys.database_principals where [type] in ('E', 'S') and [name] = 'vectordb_user')
11 | begin
12 | create user [vectordb_user] with password = 'rANd0m_PAzzw0rd!'
13 | end
14 |
15 | alter role db_owner add member [vectordb_user]
16 |
17 | end else begin
18 |
19 | if not exists (select * from sys.server_principals where [type] in ('E', 'S') and [name] = 'vectordb_user')
20 | begin
21 | create login [vectordb_user] with password = 'rANd0m_PAzzw0rd!'
22 | end
23 |
24 | if not exists (select * from sys.database_principals where [type] in ('E', 'S') and [name] = 'vectordb_user')
25 | begin
26 | create user [vectordb_user] from login [vectordb_user]
27 | end
28 |
29 | alter role db_owner add member [vectordb_user]
30 | end
31 |
32 |
--------------------------------------------------------------------------------
/src/sql/01-sample-queries.sql:
--------------------------------------------------------------------------------
1 | /*
2 | ** Make sure to run the script in the `vectordb` database **
3 | */
4 |
5 | /*
6 | Sample queries
7 | */
8 |
9 | -- Show the number of clusters generated for the wikipedia_articles_embeddings table
10 | select count(distinct cluster_id) from [$vector].[wikipedia_articles_embeddings$title_vector_ada2$clusters_centroids]
11 | go
12 |
13 | -- TITLE SEARCH
14 | set statistics time on
15 |
16 | -- Reference query, without using the IVFFLAT index
17 | declare @v vector(1536);
18 | select @v = title_vector_ada2 from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov';
19 | select top(10) id, title, [$distance] = vector_distance('cosine', @v, title_vector_ada2) from dbo.wikipedia_articles_embeddings order by [$distance]
20 | go
21 |
22 | -- Find the 10 most similar articles to 'Isaac Asimov' based on the title vector
23 | -- searching only in the closest cluster
24 | declare @v vector(1536);
25 | select @v = title_vector_ada2 from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov';
26 | select id, title, [$distance] from [$vector].find_similar$wikipedia_articles_embeddings$title_vector_ada2(@v, 10, 1, 0.75) order by [$distance]
27 | go
28 |
29 |
30 | -- Find the 10 most similar articles to 'Isaac Asimov' based on the title vector
31 | -- searching in the 10th closest cluster, in order to improve the recall
32 | declare @v vector(1536);
33 | select @v = title_vector_ada2 from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov';
34 | select id, title, [$distance] from [$vector].find_similar$wikipedia_articles_embeddings$title_vector_ada2(@v, 10, 10, 0.75) order by [$distance]
35 | go
36 |
37 | -- Find the 10 most similar articles to 'Isaac Asimov' based on the title vector
38 | -- Searching in all clusters (50 clusters are generated for the wikipedia_articles_embeddings table)
39 | -- This is equivalent to a full scan of the table, and it provides the best recall at the exposense of performances
40 | declare @v vector(1536);
41 | select @v = title_vector_ada2 from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov';
42 | select top(10) id, title, [$distance] from [$vector].find_similar$wikipedia_articles_embeddings$title_vector_ada2(@v, 10, 50, 0.75) order by [$distance]
43 | go
44 |
--------------------------------------------------------------------------------
/src/sql/02-kmeans-status.sql:
--------------------------------------------------------------------------------
1 | /*
2 | ** Make sure to run the script in the `vectordb` database **
3 | */
4 |
5 | /*
6 | Kmeans clustering index details and status
7 | */
8 | select * from [$vector].[kmeans]
--------------------------------------------------------------------------------
/src/sql/03-find-vector-cluster.sql:
--------------------------------------------------------------------------------
1 | /*
2 | ** Make sure to run the script in the `vectordb` database **
3 | */
4 |
5 | -- Get a sample vector
6 | declare @v nvarchar(max)
7 | select @v = content_vector from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov'
8 |
9 | -- Find in which cluster it belongs to
10 | select * from [$vector].[find_cluster$wikipedia_articles_embeddings$content_vector](@v)
11 | go
--------------------------------------------------------------------------------
/src/sql/04-add-new-vector.sql:
--------------------------------------------------------------------------------
1 | /*
2 | ** Make sure to run the script in the `vectordb` database **
3 | */
4 |
5 | /*
6 | As an example take an existing value to simulate a new item to be added
7 | */
8 | declare @id int = 99999;
9 | declare @v nvarchar(max);
10 | select @v = content_vector from dbo.wikipedia_articles_embeddings where title = 'Isaac Asimov';
11 |
12 | set xact_abort on
13 | begin tran
14 |
15 | /*
16 | Insert new element into source table
17 | */
18 | insert into dbo.wikipedia_articles_embeddings
19 | (id, [url], title, [text], title_vector, content_vector, vector_id)
20 | select
21 | @id, 'uri://sample', 'Isaac Asimov Copy', 'sample content', '[]' as title_vector, @v as content_vector, @id
22 |
23 | /*
24 | Insert vector
25 | */
26 | insert into
27 | [dbo].[wikipedia_articles_embeddings$content_vector]
28 | select
29 | @id, vector_value_id, vector_value
30 | from (
31 | select
32 | cast([key] as smallint) as vector_value_id,
33 | cast([value] as float) as vector_value
34 | from
35 | openjson(@v) as t
36 | ) v
37 | ;
38 |
39 | /*
40 | Add vector to cluster
41 | */
42 | declare @c int;
43 | select top(1) @c = cluster_id from [$vector].[find_cluster$wikipedia_articles_embeddings$content_vector](@v)
44 | insert into
45 | [$vector].[wikipedia_articles_embeddings$content_vector$clusters] (cluster_id, item_id)
46 | values
47 | (@c, @id);
48 |
49 | commit tran
50 |
51 |
--------------------------------------------------------------------------------
/src/sql/05-remove-vector.sql:
--------------------------------------------------------------------------------
1 | /*
2 | ** Make sure to run the script in the `vectordb` database **
3 | */
4 |
5 | set xact_abort on
6 | begin tran
7 |
8 | delete from [$vector].[wikipedia_articles_embeddings$content_vector$clusters] where item_id = 99999
9 | delete from [dbo].[wikipedia_articles_embeddings$content_vector] where item_id = 99999
10 | delete from [dbo].[wikipedia_articles_embeddings] where id = 99999
11 |
12 | commit tran
--------------------------------------------------------------------------------