├── .github └── workflows │ ├── dotnet-build-debug.yml │ └── dotnet-publish-release.yml ├── .gitignore ├── AzureCosmosDB └── csharp │ ├── .gitattributes │ ├── .gitignore │ ├── DocumentVectorPipeline.sln │ ├── DocumentVectorPipelineFunctions │ ├── .editorconfig │ ├── BlobTriggerFunction.cs │ ├── CosmosDBClientWrapper.cs │ ├── DocumentChunker.cs │ ├── DocumentVectorPipelineFunctions.csproj │ ├── Program.cs │ ├── Properties │ │ ├── ServiceDependencies │ │ │ └── local │ │ │ │ ├── secrets2.arm.json │ │ │ │ └── storage1.arm.json │ │ ├── launchSettings.json │ │ ├── serviceDependencies.json │ │ └── serviceDependencies.local.json │ ├── host.json │ └── local.settings.json │ ├── README.md │ ├── azure.yaml │ ├── deployment │ ├── cosmosdb.bicep │ ├── cosmosdb.bicepparam │ ├── documentintelligence.bicep │ ├── documentintelligence.bicepparam │ ├── functionapp.bicep │ ├── functionapp.bicepparam │ ├── main.bicep │ ├── main.bicepparam │ ├── openai.bicep │ ├── openai.bicepparam │ ├── storage.bicep │ ├── storage.bicepparam │ ├── userIdentity.bicep │ └── userIdentity.bicepparam │ ├── images │ ├── enable-vector-search.png │ └── pipeline.png │ ├── infra │ ├── abbreviations.json │ ├── app │ │ ├── processor.bicep │ │ ├── storage-Access.bicep │ │ ├── storage-PrivateEndpoint.bicep │ │ └── vnet.bicep │ ├── core │ │ ├── host │ │ │ ├── appserviceplan.bicep │ │ │ └── functions-flexconsumption.bicep │ │ ├── identity │ │ │ └── userAssignedIdentity.bicep │ │ ├── monitor │ │ │ ├── appinsights-access.bicep │ │ │ ├── applicationinsights.bicep │ │ │ ├── loganalytics.bicep │ │ │ └── monitoring.bicep │ │ └── storage │ │ │ └── storage-account.bicep │ ├── main.bicep │ └── main.parameters.json │ └── next-steps.md ├── AzureSQL └── csharp │ ├── .gitattributes │ ├── .gitignore │ ├── DocumentVectorPipeline.sln │ ├── DocumentVectorPipelineFunctions │ ├── BlobTriggerFunction.cs │ ├── Document.cs │ ├── DocumentVectorPipelineFunctions.csproj │ ├── Program.cs │ ├── TextChunker.cs │ ├── host.json │ └── local.settings.json.sample │ ├── README.md │ ├── deployment │ ├── azuresql.bicep │ ├── azuresql.bicepparam │ ├── documentintelligence.bicep │ ├── documentintelligence.bicepparam │ ├── functionapp.bicep │ ├── functionapp.bicepparam │ ├── main.bicep │ ├── main.bicepparam │ ├── openai.bicep │ ├── openai.bicepparam │ ├── storage.bicep │ ├── storage.bicepparam │ ├── userIdentity.bicep │ └── userIdentity.bicepparam │ └── images │ ├── azuresql_managedidentity.png │ ├── azuresql_pipeline.png │ └── pipeline.png ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md └── SUPPORT.md /.github/workflows/dotnet-build-debug.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a .NET project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-net 3 | 4 | name: .NET Build and Test 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Setup .NET 20 | uses: actions/setup-dotnet@v4 21 | with: 22 | dotnet-version: 8.0.x 23 | - name: Restore dependencies 24 | run: dotnet restore AzureCosmosDB/csharp/DocumentVectorPipeline.sln 25 | - name: Build 26 | run: dotnet build --no-restore AzureCosmosDB/csharp/DocumentVectorPipeline.sln 27 | - name: Test 28 | run: dotnet test --no-build --verbosity normal AzureCosmosDB/csharp/DocumentVectorPipeline.sln 29 | -------------------------------------------------------------------------------- /.github/workflows/dotnet-publish-release.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a .NET project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-net 3 | 4 | name: .NET Publish 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Setup .NET 18 | uses: actions/setup-dotnet@v4 19 | with: 20 | dotnet-version: 8.0.x 21 | - name: Publish 22 | run: dotnet publish -c Release AzureCosmosDB/csharp/DocumentVectorPipeline.sln 23 | - name: Upload dotnet test results 24 | uses: actions/upload-artifact@v4 25 | with: 26 | name: DocumentVectorPipelineFunctions 27 | path: AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/bin/Release/net8.0/publish 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto-detect text files 2 | * text=auto working-tree-encoding=UTF-8 3 | 4 | # VS files 5 | *.*proj text eol=crlf 6 | *.sln text eol=crlf 7 | 8 | # Bash scripts 9 | *.sh text eol=lf 10 | *.cmd text eol=crlf 11 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | [Ll]og/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | project.fragment.lock.json 46 | artifacts/ 47 | 48 | *_i.c 49 | *_p.c 50 | *_i.h 51 | *.ilk 52 | *.meta 53 | *.obj 54 | *.pch 55 | *.pdb 56 | *.pgc 57 | *.pgd 58 | *.rsp 59 | *.sbr 60 | *.tlb 61 | *.tli 62 | *.tlh 63 | *.tmp 64 | *.tmp_proj 65 | *.log 66 | *.vspscc 67 | *.vssscc 68 | .builds 69 | *.pidb 70 | *.svclog 71 | *.scc 72 | 73 | # Chutzpah Test files 74 | _Chutzpah* 75 | 76 | # Visual C++ cache files 77 | ipch/ 78 | *.aps 79 | *.ncb 80 | *.opendb 81 | *.opensdf 82 | *.sdf 83 | *.cachefile 84 | *.VC.db 85 | *.VC.VC.opendb 86 | 87 | # Visual Studio profiler 88 | *.psess 89 | *.vsp 90 | *.vspx 91 | *.sap 92 | 93 | # TFS 2012 Local Workspace 94 | $tf/ 95 | 96 | # Guidance Automation Toolkit 97 | *.gpState 98 | 99 | # ReSharper is a .NET coding add-in 100 | _ReSharper*/ 101 | *.[Rr]e[Ss]harper 102 | *.DotSettings.user 103 | 104 | # JustCode is a .NET coding add-in 105 | .JustCode 106 | 107 | # TeamCity is a build add-in 108 | _TeamCity* 109 | 110 | # DotCover is a Code Coverage Tool 111 | *.dotCover 112 | 113 | # NCrunch 114 | _NCrunch_* 115 | .*crunch*.local.xml 116 | nCrunchTemp_* 117 | 118 | # MightyMoose 119 | *.mm.* 120 | AutoTest.Net/ 121 | 122 | # Web workbench (sass) 123 | .sass-cache/ 124 | 125 | # Installshield output folder 126 | [Ee]xpress/ 127 | 128 | # DocProject is a documentation generator add-in 129 | DocProject/buildhelp/ 130 | DocProject/Help/*.HxT 131 | DocProject/Help/*.HxC 132 | DocProject/Help/*.hhc 133 | DocProject/Help/*.hhk 134 | DocProject/Help/*.hhp 135 | DocProject/Help/Html2 136 | DocProject/Help/html 137 | 138 | # Click-Once directory 139 | publish/ 140 | 141 | # Publish Web Output 142 | *.[Pp]ublish.xml 143 | *.azurePubxml 144 | # TODO: Comment the next line if you want to checkin your web deploy settings 145 | # but database connection strings (with potential passwords) will be unencrypted 146 | #*.pubxml 147 | *.publishproj 148 | 149 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 150 | # checkin your Azure Web App publish settings, but sensitive information contained 151 | # in these scripts will be unencrypted 152 | PublishScripts/ 153 | 154 | # NuGet Packages 155 | *.nupkg 156 | # The packages folder can be ignored because of Package Restore 157 | **/packages/* 158 | # except build/, which is used as an MSBuild target. 159 | !**/packages/build/ 160 | # Uncomment if necessary however generally it will be regenerated when needed 161 | #!**/packages/repositories.config 162 | # NuGet v3's project.json files produces more ignoreable files 163 | *.nuget.props 164 | *.nuget.targets 165 | 166 | # Microsoft Azure Build Output 167 | csx/ 168 | *.build.csdef 169 | 170 | # Microsoft Azure Emulator 171 | ecf/ 172 | rcf/ 173 | 174 | # Windows Store app package directories and files 175 | AppPackages/ 176 | BundleArtifacts/ 177 | Package.StoreAssociation.xml 178 | _pkginfo.txt 179 | 180 | # Visual Studio cache files 181 | # files ending in .cache can be ignored 182 | *.[Cc]ache 183 | # but keep track of directories ending in .cache 184 | !*.[Cc]ache/ 185 | 186 | # Others 187 | ClientBin/ 188 | ~$* 189 | *~ 190 | *.dbmdl 191 | *.dbproj.schemaview 192 | *.jfm 193 | *.pfx 194 | *.publishsettings 195 | node_modules/ 196 | orleans.codegen.cs 197 | 198 | # Since there are multiple workflows, uncomment next line to ignore bower_components 199 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 200 | #bower_components/ 201 | 202 | # RIA/Silverlight projects 203 | Generated_Code/ 204 | 205 | # Backup & report files from converting an old project file 206 | # to a newer Visual Studio version. Backup files are not needed, 207 | # because we have git ;-) 208 | _UpgradeReport_Files/ 209 | Backup*/ 210 | UpgradeLog*.XML 211 | UpgradeLog*.htm 212 | 213 | # SQL Server files 214 | *.mdf 215 | *.ldf 216 | 217 | # Business Intelligence projects 218 | *.rdl.data 219 | *.bim.layout 220 | *.bim_*.settings 221 | 222 | # Microsoft Fakes 223 | FakesAssemblies/ 224 | 225 | # GhostDoc plugin setting file 226 | *.GhostDoc.xml 227 | 228 | # Node.js Tools for Visual Studio 229 | .ntvs_analysis.dat 230 | 231 | # Visual Studio 6 build log 232 | *.plg 233 | 234 | # Visual Studio 6 workspace options file 235 | *.opt 236 | 237 | # Visual Studio LightSwitch build output 238 | **/*.HTMLClient/GeneratedArtifacts 239 | **/*.DesktopClient/GeneratedArtifacts 240 | **/*.DesktopClient/ModelManifest.xml 241 | **/*.Server/GeneratedArtifacts 242 | **/*.Server/ModelManifest.xml 243 | _Pvt_Extensions 244 | 245 | # Paket dependency manager 246 | .paket/paket.exe 247 | paket-files/ 248 | 249 | # FAKE - F# Make 250 | .fake/ 251 | 252 | # JetBrains Rider 253 | .idea/ 254 | *.sln.iml 255 | 256 | # CodeRush 257 | .cr/ 258 | 259 | # Python Tools for Visual Studio (PTVS) 260 | __pycache__/ 261 | *.pyc 262 | .azure 263 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipeline.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.11.35017.193 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DocumentVectorPipelineFunctions", "DocumentVectorPipelineFunctions\DocumentVectorPipelineFunctions.csproj", "{8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {CCA21164-A156-47CF-9371-C64CAEB32C7A} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/.editorconfig: -------------------------------------------------------------------------------- 1 | # Remove the line below if you want to inherit .editorconfig settings from higher directories 2 | root = true 3 | 4 | # C# files 5 | [*.cs] 6 | 7 | #### Core EditorConfig Options #### 8 | 9 | # Indentation and spacing 10 | indent_size = 4 11 | indent_style = space 12 | tab_width = 4 13 | 14 | # New line preferences 15 | end_of_line = crlf 16 | insert_final_newline = false 17 | 18 | #### .NET Code Actions #### 19 | 20 | # Type members 21 | dotnet_hide_advanced_members = false 22 | dotnet_member_insertion_location = with_other_members_of_the_same_kind 23 | dotnet_property_generation_behavior = prefer_auto_properties 24 | 25 | # Symbol search 26 | dotnet_search_reference_assemblies = true 27 | 28 | #### .NET Coding Conventions #### 29 | 30 | # Organize usings 31 | dotnet_separate_import_directive_groups = false 32 | dotnet_sort_system_directives_first = false 33 | file_header_template = unset 34 | 35 | # this. and Me. preferences 36 | dotnet_style_qualification_for_event = false 37 | dotnet_style_qualification_for_field = false 38 | dotnet_style_qualification_for_method = false 39 | dotnet_style_qualification_for_property = false 40 | 41 | # Language keywords vs BCL types preferences 42 | dotnet_style_predefined_type_for_locals_parameters_members = true 43 | dotnet_style_predefined_type_for_member_access = true 44 | 45 | # Parentheses preferences 46 | dotnet_style_parentheses_in_arithmetic_binary_operators = never_if_unnecessary 47 | dotnet_style_parentheses_in_other_binary_operators = always_for_clarity 48 | dotnet_style_parentheses_in_other_operators = never_if_unnecessary 49 | dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity 50 | 51 | # Modifier preferences 52 | dotnet_style_require_accessibility_modifiers = for_non_interface_members 53 | 54 | # Expression-level preferences 55 | dotnet_prefer_system_hash_code = true 56 | dotnet_style_coalesce_expression = true 57 | dotnet_style_collection_initializer = true 58 | dotnet_style_explicit_tuple_names = true 59 | dotnet_style_namespace_match_folder = true 60 | dotnet_style_null_propagation = true 61 | dotnet_style_object_initializer = true 62 | dotnet_style_operator_placement_when_wrapping = beginning_of_line 63 | dotnet_style_prefer_auto_properties = true 64 | dotnet_style_prefer_collection_expression = when_types_loosely_match 65 | dotnet_style_prefer_compound_assignment = true 66 | dotnet_style_prefer_conditional_expression_over_assignment = true 67 | dotnet_style_prefer_conditional_expression_over_return = true 68 | dotnet_style_prefer_foreach_explicit_cast_in_source = when_strongly_typed 69 | dotnet_style_prefer_inferred_anonymous_type_member_names = true 70 | dotnet_style_prefer_inferred_tuple_names = true 71 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:silent 72 | dotnet_style_prefer_simplified_boolean_expressions = true 73 | dotnet_style_prefer_simplified_interpolation = true 74 | 75 | # Field preferences 76 | dotnet_style_readonly_field = true 77 | 78 | # Parameter preferences 79 | dotnet_code_quality_unused_parameters = all 80 | 81 | # Suppression preferences 82 | dotnet_remove_unnecessary_suppression_exclusions = none 83 | 84 | # New line preferences 85 | dotnet_style_allow_multiple_blank_lines_experimental = true 86 | dotnet_style_allow_statement_immediately_after_block_experimental = true 87 | 88 | #### C# Coding Conventions #### 89 | 90 | # var preferences 91 | csharp_style_var_elsewhere = true:suggestion 92 | csharp_style_var_for_built_in_types = true:suggestion 93 | csharp_style_var_when_type_is_apparent = true:suggestion 94 | 95 | # Expression-bodied members 96 | csharp_style_expression_bodied_accessors = true:silent 97 | csharp_style_expression_bodied_constructors = false:silent 98 | csharp_style_expression_bodied_indexers = true:silent 99 | csharp_style_expression_bodied_lambdas = true:silent 100 | csharp_style_expression_bodied_local_functions = false:silent 101 | csharp_style_expression_bodied_methods = false:silent 102 | csharp_style_expression_bodied_operators = false:silent 103 | csharp_style_expression_bodied_properties = true:silent 104 | 105 | # Pattern matching preferences 106 | csharp_style_pattern_matching_over_as_with_null_check = true:suggestion 107 | csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion 108 | csharp_style_prefer_extended_property_pattern = true:suggestion 109 | csharp_style_prefer_not_pattern = true:suggestion 110 | csharp_style_prefer_pattern_matching = true:silent 111 | csharp_style_prefer_switch_expression = true:suggestion 112 | 113 | # Null-checking preferences 114 | csharp_style_conditional_delegate_call = true:suggestion 115 | 116 | # Modifier preferences 117 | csharp_prefer_static_anonymous_function = true:suggestion 118 | csharp_prefer_static_local_function = true:suggestion 119 | csharp_preferred_modifier_order = public,private,protected,internal,file,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,required,volatile,async 120 | csharp_style_prefer_readonly_struct = true:suggestion 121 | csharp_style_prefer_readonly_struct_member = true:suggestion 122 | 123 | # Code-block preferences 124 | csharp_prefer_braces = true:suggestion 125 | csharp_prefer_simple_using_statement = true:suggestion 126 | csharp_prefer_system_threading_lock = true:suggestion 127 | csharp_style_namespace_declarations = file_scoped:suggestion 128 | csharp_style_prefer_method_group_conversion = true:silent 129 | csharp_style_prefer_primary_constructors = true:suggestion 130 | csharp_style_prefer_top_level_statements = true:silent 131 | 132 | # Expression-level preferences 133 | csharp_prefer_simple_default_expression = true:silent 134 | csharp_style_deconstructed_variable_declaration = true:silent 135 | csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion 136 | csharp_style_inlined_variable_declaration = true:suggestion 137 | csharp_style_prefer_index_operator = true:suggestion 138 | csharp_style_prefer_local_over_anonymous_function = true:silent 139 | csharp_style_prefer_null_check_over_type_check = true:suggestion 140 | csharp_style_prefer_range_operator = true:suggestion 141 | csharp_style_prefer_tuple_swap = true:suggestion 142 | csharp_style_prefer_utf8_string_literals = true:suggestion 143 | csharp_style_throw_expression = true:suggestion 144 | csharp_style_unused_value_assignment_preference = discard_variable:suggestion 145 | csharp_style_unused_value_expression_statement_preference = discard_variable:silent 146 | 147 | # 'using' directive preferences 148 | csharp_using_directive_placement = outside_namespace:silent 149 | 150 | # New line preferences 151 | csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent 152 | csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true:silent 153 | csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true:silent 154 | csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent 155 | csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent 156 | 157 | #### C# Formatting Rules #### 158 | 159 | # New line preferences 160 | csharp_new_line_before_catch = true 161 | csharp_new_line_before_else = true 162 | csharp_new_line_before_finally = true 163 | csharp_new_line_before_members_in_anonymous_types = true 164 | csharp_new_line_before_members_in_object_initializers = true 165 | csharp_new_line_before_open_brace = all 166 | csharp_new_line_between_query_expression_clauses = true 167 | 168 | # Indentation preferences 169 | csharp_indent_block_contents = true 170 | csharp_indent_braces = false 171 | csharp_indent_case_contents = true 172 | csharp_indent_case_contents_when_block = true 173 | csharp_indent_labels = one_less_than_current 174 | csharp_indent_switch_labels = true 175 | 176 | # Space preferences 177 | csharp_space_after_cast = false 178 | csharp_space_after_colon_in_inheritance_clause = true 179 | csharp_space_after_comma = true 180 | csharp_space_after_dot = false 181 | csharp_space_after_keywords_in_control_flow_statements = true 182 | csharp_space_after_semicolon_in_for_statement = true 183 | csharp_space_around_binary_operators = before_and_after 184 | csharp_space_around_declaration_statements = false 185 | csharp_space_before_colon_in_inheritance_clause = true 186 | csharp_space_before_comma = false 187 | csharp_space_before_dot = false 188 | csharp_space_before_open_square_brackets = false 189 | csharp_space_before_semicolon_in_for_statement = false 190 | csharp_space_between_empty_square_brackets = false 191 | csharp_space_between_method_call_empty_parameter_list_parentheses = false 192 | csharp_space_between_method_call_name_and_opening_parenthesis = false 193 | csharp_space_between_method_call_parameter_list_parentheses = false 194 | csharp_space_between_method_declaration_empty_parameter_list_parentheses = false 195 | csharp_space_between_method_declaration_name_and_open_parenthesis = false 196 | csharp_space_between_method_declaration_parameter_list_parentheses = false 197 | csharp_space_between_parentheses = false 198 | csharp_space_between_square_brackets = false 199 | 200 | # Wrapping preferences 201 | csharp_preserve_single_line_blocks = true 202 | csharp_preserve_single_line_statements = true 203 | 204 | #### Naming styles #### 205 | 206 | # Naming rules 207 | 208 | dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion 209 | dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface 210 | dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i 211 | 212 | dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion 213 | dotnet_naming_rule.types_should_be_pascal_case.symbols = types 214 | dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case 215 | 216 | dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion 217 | dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members 218 | dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case 219 | 220 | # Symbol specifications 221 | 222 | dotnet_naming_symbols.interface.applicable_kinds = interface 223 | dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected 224 | dotnet_naming_symbols.interface.required_modifiers = 225 | 226 | dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum 227 | dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected 228 | dotnet_naming_symbols.types.required_modifiers = 229 | 230 | dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method 231 | dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected 232 | dotnet_naming_symbols.non_field_members.required_modifiers = 233 | 234 | # Naming styles 235 | 236 | dotnet_naming_style.pascal_case.required_prefix = 237 | dotnet_naming_style.pascal_case.required_suffix = 238 | dotnet_naming_style.pascal_case.word_separator = 239 | dotnet_naming_style.pascal_case.capitalization = pascal_case 240 | 241 | dotnet_naming_style.begins_with_i.required_prefix = I 242 | dotnet_naming_style.begins_with_i.required_suffix = 243 | dotnet_naming_style.begins_with_i.word_separator = 244 | dotnet_naming_style.begins_with_i.capitalization = pascal_case 245 | 246 | [*.{cs,vb}] 247 | dotnet_style_operator_placement_when_wrapping = beginning_of_line 248 | tab_width = 4 249 | indent_size = 4 250 | end_of_line = crlf 251 | dotnet_style_coalesce_expression = true:suggestion 252 | dotnet_style_null_propagation = true:suggestion 253 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion 254 | dotnet_style_prefer_auto_properties = true:suggestion 255 | dotnet_style_object_initializer = true:suggestion 256 | dotnet_style_collection_initializer = true:suggestion 257 | dotnet_style_prefer_simplified_boolean_expressions = true:suggestion 258 | dotnet_style_prefer_conditional_expression_over_assignment = true:silent 259 | dotnet_style_prefer_conditional_expression_over_return = true:silent 260 | dotnet_style_explicit_tuple_names = true:suggestion 261 | dotnet_style_prefer_inferred_tuple_names = true:suggestion 262 | dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion 263 | dotnet_style_prefer_compound_assignment = true:suggestion 264 | dotnet_style_prefer_simplified_interpolation = true:suggestion 265 | dotnet_style_prefer_collection_expression = when_types_loosely_match:suggestion 266 | dotnet_style_namespace_match_folder = true:suggestion 267 | dotnet_style_readonly_field = true:suggestion 268 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion 269 | dotnet_style_predefined_type_for_member_access = true:suggestion 270 | dotnet_style_require_accessibility_modifiers = for_non_interface_members:suggestion 271 | dotnet_style_allow_multiple_blank_lines_experimental = true:silent 272 | dotnet_style_allow_statement_immediately_after_block_experimental = true:silent 273 | dotnet_code_quality_unused_parameters = all:suggestion 274 | dotnet_style_parentheses_in_arithmetic_binary_operators = never_if_unnecessary:silent 275 | dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent 276 | dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent 277 | dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent 278 | dotnet_style_qualification_for_field = false:silent 279 | dotnet_style_qualification_for_property = false:silent 280 | dotnet_style_qualification_for_method = false:silent 281 | dotnet_style_qualification_for_event = false:silent -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/BlobTriggerFunction.cs: -------------------------------------------------------------------------------- 1 | using System.ClientModel; 2 | using System.IO; 3 | using System.Net; 4 | using System.Text; 5 | using System.Threading; 6 | using Azure; 7 | using Azure.AI.FormRecognizer.DocumentAnalysis; 8 | using Azure.Storage.Blobs; 9 | using Microsoft.Azure.Cosmos; 10 | using Microsoft.Azure.Functions.Worker; 11 | using Microsoft.Extensions.Configuration; 12 | using Microsoft.Extensions.Logging; 13 | using OpenAI.Embeddings; 14 | 15 | namespace DocumentVectorPipelineFunctions; 16 | 17 | public class BlobTriggerFunction( 18 | IConfiguration configuration, 19 | DocumentAnalysisClient documentAnalysisClient, 20 | ILoggerFactory loggerFactory, 21 | CosmosClient cosmosClient, 22 | EmbeddingClient embeddingClient) 23 | { 24 | private readonly ILogger _logger = loggerFactory.CreateLogger(); 25 | 26 | private const string AzureOpenAIModelDeploymentDimensionsName = "AzureOpenAIModelDimensions"; 27 | private static readonly int DefaultDimensions = 1536; 28 | 29 | private const string MaxTokensPerChunkName = "MaxTokensPerChunk"; 30 | private const string OverlapTokensName = "OverlapTokens"; 31 | 32 | private const int MaxRetryCount = 100; 33 | private const int RetryDelay = 10 * 1000; // 10 seconds 34 | 35 | private const int MaxBatchSize = 10; 36 | private const int MaxDegreeOfParallelism = 50; 37 | 38 | private int embeddingDimensions = DefaultDimensions; 39 | 40 | [Function("BlobTriggerFunction")] 41 | public async Task Run([BlobTrigger("documents/{name}", Connection = "AzureBlobStorageAccConnectionString")] BlobClient blobClient) 42 | { 43 | this._logger.LogInformation("Starting processing of blob name: '{name}'", blobClient.Name); 44 | 45 | if (await blobClient.ExistsAsync()) 46 | { 47 | await this.HandleBlobCreateEventAsync(blobClient); 48 | } 49 | else 50 | { 51 | await this.HandleBlobDeleteEventAsync(blobClient); 52 | } 53 | this._logger.LogInformation("Finished processing of blob name: '{name}'", blobClient.Name); 54 | } 55 | 56 | private async Task HandleBlobCreateEventAsync(BlobClient blobClient) 57 | { 58 | var cosmosDBClientWrapper = await CosmosDBClientWrapper.CreateInstance(cosmosClient, this._logger); 59 | 60 | this.embeddingDimensions = configuration.GetValue(AzureOpenAIModelDeploymentDimensionsName, DefaultDimensions); 61 | this._logger.LogInformation("Using OpenAI model dimensions: '{embeddingDimensions}'.", this.embeddingDimensions); 62 | 63 | var maxTokensPerChunk = configuration.GetValue(MaxTokensPerChunkName, DocumentChunker.DefaultMaxTokensPerChunk); 64 | var overlapTokens = configuration.GetValue(OverlapTokensName, DocumentChunker.DefaultOverlapTokens); 65 | 66 | var extension = Path.GetExtension(blobClient.Name); 67 | var textChunks = new List(); 68 | if (extension == ".txt") 69 | { 70 | using var stream = await blobClient.OpenReadAsync(); 71 | var lines = await ReadAllLinesAsync(stream); 72 | textChunks.AddRange(DocumentChunker.ChunkTextLines( 73 | lines, maxTokensPerChunk, overlapTokens)); 74 | } 75 | else if (extension == ".md") 76 | { 77 | using var stream = await blobClient.OpenReadAsync(); 78 | var lines = await ReadAllLinesAsync(stream); 79 | textChunks.AddRange(DocumentChunker.ChunkMarkdownLines( 80 | lines, maxTokensPerChunk, overlapTokens)); 81 | } 82 | else 83 | { 84 | this._logger.LogInformation("Analyzing document using DocumentAnalyzerService from blobUri: '{blobUri}' using layout: {layout}", blobClient.Name, "prebuilt-read"); 85 | 86 | using var memoryStream = new MemoryStream(); 87 | await blobClient.DownloadToAsync(memoryStream); 88 | memoryStream.Seek(0, SeekOrigin.Begin); 89 | 90 | var operation = await documentAnalysisClient.AnalyzeDocumentAsync( 91 | WaitUntil.Completed, 92 | "prebuilt-read", 93 | memoryStream); 94 | 95 | var result = operation.Value; 96 | this._logger.LogInformation("Extracted content from '{name}', # pages {pageCount}", blobClient.Name, result.Pages.Count); 97 | 98 | textChunks.AddRange(DocumentChunker.FixedSizeChunking(result, maxTokensPerChunk, overlapTokens)); 99 | } 100 | 101 | var listOfBatches = textChunks.Chunk(MaxBatchSize).ToList(); 102 | 103 | this._logger.LogInformation("Processing list of batches in parallel, total batches: {listSize}, chunks count: {chunksCount}", listOfBatches.Count, textChunks.Count); 104 | await Parallel.ForEachAsync(listOfBatches, new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, async (batchChunkText, cancellationToken) => 105 | { 106 | this._logger.LogInformation("Processing batch of size: {batchSize}", batchChunkText.Length); 107 | await this.ProcessCurrentBatchAsync(blobClient, cosmosDBClientWrapper, [.. batchChunkText], cancellationToken); 108 | }); 109 | 110 | this._logger.LogInformation("Finished processing blob {name}, total chunks processed {count}.", blobClient.Name, textChunks.Count); 111 | } 112 | 113 | private async Task ProcessCurrentBatchAsync(BlobClient blobClient, CosmosDBClientWrapper cosmosDBClientWrapper, List batchChunkTexts, CancellationToken cancellationToken) 114 | { 115 | this._logger.LogInformation("Generating embeddings for batch of size: '{size}'.", batchChunkTexts.Count); 116 | var embeddings = await this.GenerateEmbeddingsWithRetryAsync(batchChunkTexts); 117 | 118 | this._logger.LogInformation("Creating Cosmos DB documents for batch of size {count}", batchChunkTexts.Count); 119 | await cosmosDBClientWrapper.UpsertDocumentsAsync(blobClient.Uri.AbsoluteUri, batchChunkTexts, embeddings, cancellationToken); 120 | } 121 | 122 | private async Task GenerateEmbeddingsWithRetryAsync(IEnumerable batchChunkTexts) 123 | { 124 | var embeddingGenerationOptions = new EmbeddingGenerationOptions() 125 | { 126 | Dimensions = this.embeddingDimensions 127 | }; 128 | 129 | var retryCount = 0; 130 | while (retryCount < MaxRetryCount) 131 | { 132 | try 133 | { 134 | return await embeddingClient.GenerateEmbeddingsAsync(batchChunkTexts.Select(p => p.Text).ToList(), embeddingGenerationOptions); 135 | } 136 | catch (ClientResultException ex) 137 | { 138 | if (ex.Status is ((int)HttpStatusCode.TooManyRequests) or ((int)HttpStatusCode.Unauthorized)) 139 | { 140 | if (retryCount >= MaxRetryCount) 141 | { 142 | throw new Exception($"Max retry attempts reached generating embeddings with exception: {ex}."); 143 | } 144 | 145 | retryCount++; 146 | 147 | await Task.Delay(RetryDelay); 148 | } 149 | else 150 | { 151 | throw new Exception($"Failed to generate embeddings with error: {ex}."); 152 | } 153 | } 154 | } 155 | 156 | throw new Exception($"Failed to generate embeddings after retrying for ${MaxRetryCount} times."); 157 | } 158 | 159 | private async Task HandleBlobDeleteEventAsync(BlobClient blobClient) 160 | { 161 | // TODO (amisi) - Implement me. 162 | this._logger.LogInformation("Handling delete event for blob name {name}.", blobClient.Name); 163 | 164 | await Task.Delay(1); 165 | } 166 | 167 | private static async Task> ReadAllLinesAsync( 168 | Stream inputStream, 169 | CancellationToken cancellationToken = default) 170 | { 171 | using var sr = new StreamReader( 172 | inputStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true); 173 | 174 | cancellationToken.ThrowIfCancellationRequested(); 175 | string? line; 176 | var lines = new List(); 177 | while ((line = await sr.ReadLineAsync(cancellationToken)) != null) 178 | { 179 | lines.Add(line); 180 | cancellationToken.ThrowIfCancellationRequested(); 181 | } 182 | 183 | return lines; 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/CosmosDBClientWrapper.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | using System.Net; 3 | using System.Text.Json.Serialization; 4 | using Microsoft.Azure.Cosmos; 5 | using Microsoft.Extensions.Logging; 6 | using OpenAI.Embeddings; 7 | using Container = Microsoft.Azure.Cosmos.Container; 8 | 9 | namespace DocumentVectorPipelineFunctions; 10 | 11 | internal class CosmosDBClientWrapper 12 | { 13 | private readonly CosmosClient client; 14 | private readonly ILogger logger; 15 | private Container? container; 16 | 17 | private static CosmosDBClientWrapper? instance; 18 | 19 | private const int MaxRetryCount = 100; 20 | 21 | public static async ValueTask CreateInstance(CosmosClient client, ILogger logger) 22 | { 23 | if (instance != null) 24 | { 25 | return instance; 26 | } 27 | 28 | var curInstance = new CosmosDBClientWrapper(client, logger); 29 | await curInstance.GetOrCreateDatabaseAndContainerAsync(); 30 | 31 | instance = curInstance; 32 | 33 | return instance; 34 | } 35 | 36 | public async Task UpsertDocumentsAsync(string fileUri, List chunks, EmbeddingCollection embeddings, CancellationToken cancellationToken) 37 | { 38 | if (this.container == null) 39 | { 40 | throw new InvalidOperationException("Container is not initialized."); 41 | } 42 | 43 | var upsertTasks = new List>>(); 44 | for (var index = 0; index < chunks.Count; index++) 45 | { 46 | var documentChunk = new DocumentChunk 47 | { 48 | ChunkId = chunks[index].ChunkNumber.ToString("d", CultureInfo.InvariantCulture), 49 | DocumentUrl = fileUri, 50 | Embedding = embeddings[index].Vector, 51 | ChunkText = chunks[index].Text, 52 | }; 53 | upsertTasks.Add(this.UpsertDocumentWithRetryAsync(documentChunk, CosmosDBClientWrapper.MaxRetryCount, cancellationToken)); 54 | } 55 | 56 | try 57 | { 58 | await Task.WhenAll(upsertTasks); 59 | } 60 | catch (AggregateException aggEx) 61 | { 62 | foreach (var item in aggEx.InnerExceptions) 63 | { 64 | if (item is CosmosException cosmosException) 65 | { 66 | this.LogHeaders(cosmosException.Headers); 67 | } 68 | } 69 | 70 | throw; 71 | } 72 | } 73 | 74 | private async Task> UpsertDocumentWithRetryAsync( 75 | DocumentChunk document, 76 | int maxRetryAttempts, 77 | CancellationToken cancellationToken) 78 | { 79 | if (this.container == null) 80 | { 81 | throw new InvalidOperationException("Container is not initialized."); 82 | } 83 | 84 | var retryCount = 0; 85 | while (retryCount < maxRetryAttempts) 86 | { 87 | try 88 | { 89 | return await this.container.UpsertItemAsync(document, cancellationToken: cancellationToken); 90 | } 91 | catch (CosmosException ex) when (ex.StatusCode == HttpStatusCode.TooManyRequests) 92 | { 93 | retryCount++; 94 | await Task.Delay(ex.RetryAfter.GetValueOrDefault(), cancellationToken); 95 | } 96 | catch (Exception ex) 97 | { 98 | this.logger.LogError("An error occurred while upserting document with ID {chunkId}: {exceptionMessage}", document.ChunkId, ex.Message); 99 | throw; 100 | } 101 | } 102 | 103 | throw new Exception($"Max retry attempts reached for document with ID {document.ChunkId}. Operation failed."); 104 | } 105 | 106 | private CosmosDBClientWrapper(CosmosClient client, ILogger logger) 107 | { 108 | this.client = client; 109 | this.logger = logger; 110 | } 111 | 112 | private async Task GetOrCreateDatabaseAndContainerAsync() 113 | { 114 | var dbResponse = await this.client.CreateDatabaseIfNotExistsAsync("semantic_search_db"); 115 | 116 | var indexingPolicy = new IndexingPolicy() 117 | { 118 | // TODO: Include Full-Text Index for the chunk_text property. 119 | VectorIndexes = 120 | [ 121 | new VectorIndexPath 122 | { 123 | Path = "/embedding", 124 | Type = VectorIndexType.QuantizedFlat, 125 | } 126 | ] 127 | }; 128 | indexingPolicy.ExcludedPaths.Add(new ExcludedPath { Path = "/embedding/*" }); 129 | var containerResponse = await dbResponse.Database.CreateContainerIfNotExistsAsync(new ContainerProperties 130 | { 131 | Id = "doc_search_container", 132 | PartitionKeyPath = "/document_url", 133 | 134 | IndexingPolicy = indexingPolicy, 135 | VectorEmbeddingPolicy = new( 136 | [ 137 | new Microsoft.Azure.Cosmos.Embedding 138 | { 139 | DataType = VectorDataType.Float32, 140 | Dimensions = 1536, 141 | DistanceFunction = DistanceFunction.Cosine, 142 | Path = "/embedding" 143 | }, 144 | ]), 145 | }); 146 | 147 | this.container = containerResponse.Container; 148 | if (containerResponse.StatusCode != System.Net.HttpStatusCode.OK) 149 | { 150 | this.LogHeaders(containerResponse.Headers); 151 | } 152 | } 153 | 154 | private void LogHeaders(Headers headers) 155 | { 156 | using var scope = this.logger.BeginScope("Created a container."); 157 | 158 | foreach (var headerName in headers.AllKeys()) 159 | { 160 | this.logger.LogWarning("Header: {header}, Value: '{value}'", headerName, headers[headerName]); 161 | } 162 | } 163 | 164 | private class DocumentChunk 165 | { 166 | [JsonPropertyName("id")] 167 | public string? ChunkId { get; init; } 168 | 169 | [JsonPropertyName("document_url")] 170 | public string? DocumentUrl { get; init; } 171 | 172 | [JsonPropertyName("chunk_text")] 173 | public string? ChunkText { get; init; } 174 | 175 | [JsonPropertyName("embedding")] 176 | public ReadOnlyMemory Embedding { get; init; } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/DocumentChunker.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | using Azure.AI.FormRecognizer.DocumentAnalysis; 3 | using Microsoft.SemanticKernel.Text; 4 | 5 | namespace DocumentVectorPipelineFunctions; 6 | 7 | internal record struct TextChunk( 8 | string Text, 9 | int ChunkNumber); 10 | 11 | internal static class DocumentChunker 12 | { 13 | public const int DefaultMaxTokensPerChunk = 250; 14 | public const int DefaultOverlapTokens = 0; 15 | 16 | #pragma warning disable SKEXP0050 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. 17 | 18 | public static IEnumerable FixedSizeChunking( 19 | AnalyzeResult? result, 20 | int maxTokensPerChunk, 21 | int overlapTokens) 22 | { 23 | if (result == null) 24 | { 25 | return []; 26 | } 27 | 28 | // Handle different types of output from Azure Document Intelligence. 29 | // This happens for different types of input. In particular, .docx files 30 | // don't seem to have lines populated. 31 | // 32 | // If it has a collection of pages with lines, use that. 33 | // 34 | // Otherwise if there are paragraphs, we'll use them as input. 35 | // 36 | // Third, we'll use the "words" collection of each page, building it up into a 37 | // roughly line sized blocks to pass in. 38 | // 39 | // Finally, if there is nothing else, we'll fall back to the Content property. 40 | IEnumerable lines; 41 | if (result.Pages?.Count > 0 && result.Pages?[0]?.Lines?.Count > 0) 42 | { 43 | lines = result.Pages.SelectMany(page => page.Lines.Select(line => line.Content)); 44 | } 45 | else if (result.Paragraphs?.Count > 0) 46 | { 47 | lines = result.Paragraphs.Select(para => para.Content); 48 | } 49 | else if (result.Pages?.Count > 0 && result.Pages?[0]?.Words?.Count > 0) 50 | { 51 | lines = SplitWords(result); 52 | } 53 | else 54 | { 55 | lines = [result.Content]; 56 | } 57 | 58 | var chunkNumber = 0; 59 | return TextChunker.SplitPlainTextParagraphs(lines, maxTokensPerChunk, overlapTokens) 60 | .Select(para => new TextChunk(para, chunkNumber++)); 61 | } 62 | 63 | public static IEnumerable ChunkTextLines( 64 | IEnumerable lines, 65 | int maxTokensPerChunk, 66 | int overlapTokens) 67 | { 68 | var chunkNumber = 0; 69 | return TextChunker.SplitMarkdownParagraphs(lines, maxTokensPerChunk, overlapTokens) 70 | .Select(para => new TextChunk(para, chunkNumber++)); 71 | } 72 | 73 | public static IEnumerable ChunkMarkdownLines( 74 | IEnumerable lines, 75 | int maxTokensPerChunk, 76 | int overlapTokens) 77 | { 78 | var chunkNumber = 0; 79 | return TextChunker.SplitMarkdownParagraphs(lines, maxTokensPerChunk, overlapTokens) 80 | .Select(para => new TextChunk(para, chunkNumber++)); 81 | } 82 | 83 | #pragma warning restore SKEXP0050 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. 84 | 85 | private const int MaxChunkWordCount = 40; 86 | 87 | private static IEnumerable SplitWords(AnalyzeResult result) 88 | { 89 | var sb = new StringBuilder(MaxChunkWordCount); 90 | var wordCount = 0; 91 | foreach (var page in result.Pages) 92 | { 93 | foreach (var word in page.Words) 94 | { 95 | sb.Append(word.Content).Append(' '); 96 | wordCount++; 97 | if (wordCount > MaxChunkWordCount) 98 | { 99 | sb.Length -= 1; 100 | var chunk = sb.ToString(); 101 | sb.Clear(); 102 | wordCount = 0; 103 | 104 | yield return chunk; 105 | } 106 | } 107 | } 108 | 109 | if (sb.Length > 0) 110 | { 111 | sb.Length -= 1; 112 | var chunk = sb.ToString(); 113 | yield return chunk; 114 | } 115 | } 116 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/DocumentVectorPipelineFunctions.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | net8.0 4 | v4 5 | Exe 6 | enable 7 | enable 8 | 4790278e-5c51-4fec-a397-f7eaa13e76f5 9 | False 10 | 0.1.0.0 11 | preview.1 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | PreserveNewest 35 | 36 | 37 | PreserveNewest 38 | Never 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/Program.cs: -------------------------------------------------------------------------------- 1 | using System.ClientModel.Primitives; 2 | using System.Text.Json; 3 | using Azure.AI.FormRecognizer.DocumentAnalysis; 4 | using Azure.AI.OpenAI; 5 | using Azure.Core; 6 | using Azure.Identity; 7 | using DocumentVectorPipelineFunctions; 8 | using Microsoft.Azure.Cosmos; 9 | using Microsoft.Extensions.Configuration; 10 | using Microsoft.Extensions.DependencyInjection; 11 | using Microsoft.Extensions.Hosting; 12 | using OpenAI.Embeddings; 13 | 14 | const string AzureDocumentIntelligenceEndpointConfigName = "AzureDocumentIntelligenceConnectionString"; 15 | const string AzureCosmosDBConnectionString = "AzureCosmosDBConnectionString"; 16 | const string AzureOpenAIConnectionString = "AzureOpenAIConnectionString"; 17 | const string AzureOpenAIModelDeploymentConfigName = "AzureOpenAIModelDeployment"; 18 | 19 | var managedIdentityClientId = Environment.GetEnvironmentVariable("AzureManagedIdentityClientId"); 20 | var local = Convert.ToBoolean(Environment.GetEnvironmentVariable("RunningLocally")); 21 | 22 | TokenCredential credential = local 23 | ? new DefaultAzureCredential() 24 | : new ManagedIdentityCredential(clientId: managedIdentityClientId); 25 | 26 | var hostBuilder = new HostBuilder() 27 | .ConfigureFunctionsWorkerDefaults() 28 | .ConfigureAppConfiguration(config => 29 | { 30 | config.AddUserSecrets(optional: true, reloadOnChange: false); 31 | }); 32 | 33 | hostBuilder.ConfigureServices(sc => 34 | { 35 | sc.AddSingleton(sp => 36 | { 37 | var config = sp.GetRequiredService(); 38 | var documentIntelligenceEndpoint = config[AzureDocumentIntelligenceEndpointConfigName] ?? throw new Exception($"Configure {AzureDocumentIntelligenceEndpointConfigName}"); 39 | var documentAnalysisClient = new DocumentAnalysisClient( 40 | new Uri(documentIntelligenceEndpoint), 41 | credential); 42 | return documentAnalysisClient; 43 | }); 44 | sc.AddSingleton(sp => 45 | { 46 | var config = sp.GetRequiredService(); 47 | var cosmosDbEndpoint = config[AzureCosmosDBConnectionString] ?? throw new Exception($"Configure {AzureCosmosDBConnectionString}"); 48 | var cosmosClient = new CosmosClient( 49 | cosmosDbEndpoint, 50 | credential, 51 | new CosmosClientOptions 52 | { 53 | ApplicationName = "document ingestion", 54 | AllowBulkExecution = true, 55 | UseSystemTextJsonSerializerWithOptions = JsonSerializerOptions.Default, 56 | }); 57 | return cosmosClient; 58 | }); 59 | sc.AddSingleton(sp => 60 | { 61 | var config = sp.GetRequiredService(); 62 | var openAIEndpoint = config[AzureOpenAIConnectionString] ?? throw new Exception($"Configure {AzureCosmosDBConnectionString}"); 63 | // TODO: Implement a custom retry policy that takes the retry-after header into account. 64 | var azureOpenAIClient = new AzureOpenAIClient( 65 | new Uri(openAIEndpoint), 66 | credential, 67 | new AzureOpenAIClientOptions() 68 | { 69 | ApplicationId = "DocumentIngestion", 70 | RetryPolicy = new ClientRetryPolicy(maxRetries: 10), 71 | }); 72 | return azureOpenAIClient.GetEmbeddingClient(config[AzureOpenAIModelDeploymentConfigName] ?? throw new Exception($"Configure {AzureOpenAIModelDeploymentConfigName}")); 73 | }); 74 | }); 75 | 76 | var host = hostBuilder.Build(); 77 | host.Run(); 78 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/Properties/ServiceDependencies/local/secrets2.arm.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "resourceGroupName": { 6 | "type": "string", 7 | "defaultValue": "kevinpi-rg", 8 | "metadata": { 9 | "_parameterType": "resourceGroup", 10 | "description": "Name of the resource group for the resource. It is recommended to put resources under same resource group for better tracking." 11 | } 12 | }, 13 | "resourceGroupLocation": { 14 | "type": "string", 15 | "defaultValue": "westcentralus", 16 | "metadata": { 17 | "_parameterType": "location", 18 | "description": "Location of the resource group. Resource groups could have different location than resources." 19 | } 20 | }, 21 | "resourceLocation": { 22 | "type": "string", 23 | "defaultValue": "[parameters('resourceGroupLocation')]", 24 | "metadata": { 25 | "_parameterType": "location", 26 | "description": "Location of the resource. By default use resource group's location, unless the resource provider is not supported there." 27 | } 28 | } 29 | }, 30 | "resources": [ 31 | { 32 | "type": "Microsoft.Resources/resourceGroups", 33 | "name": "[parameters('resourceGroupName')]", 34 | "location": "[parameters('resourceGroupLocation')]", 35 | "apiVersion": "2019-10-01" 36 | }, 37 | { 38 | "type": "Microsoft.Resources/deployments", 39 | "name": "[concat(parameters('resourceGroupName'), 'Deployment', uniqueString(concat('docing-kv', subscription().subscriptionId)))]", 40 | "resourceGroup": "[parameters('resourceGroupName')]", 41 | "apiVersion": "2019-10-01", 42 | "dependsOn": [ 43 | "[parameters('resourceGroupName')]" 44 | ], 45 | "properties": { 46 | "mode": "Incremental", 47 | "template": { 48 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 49 | "contentVersion": "1.0.0.0", 50 | "resources": [ 51 | { 52 | "name": "docing-kv", 53 | "type": "Microsoft.KeyVault/vaults", 54 | "location": "[parameters('resourceLocation')]", 55 | "properties": { 56 | "sku": { 57 | "family": "A", 58 | "name": "Standard" 59 | }, 60 | "tenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47", 61 | "accessPolicies": [], 62 | "enabledForDeployment": true, 63 | "enabledForDiskEncryption": true, 64 | "enabledForTemplateDeployment": true, 65 | "enableSoftDelete": true 66 | }, 67 | "apiVersion": "2016-10-01" 68 | } 69 | ] 70 | } 71 | } 72 | } 73 | ], 74 | "metadata": { 75 | "_dependencyType": "secrets.keyVault" 76 | } 77 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/Properties/ServiceDependencies/local/storage1.arm.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "resourceGroupName": { 6 | "type": "string", 7 | "defaultValue": "kevinpi-rg", 8 | "metadata": { 9 | "_parameterType": "resourceGroup", 10 | "description": "Name of the resource group for the resource. It is recommended to put resources under same resource group for better tracking." 11 | } 12 | }, 13 | "resourceGroupLocation": { 14 | "type": "string", 15 | "defaultValue": "westcentralus", 16 | "metadata": { 17 | "_parameterType": "location", 18 | "description": "Location of the resource group. Resource groups could have different location than resources." 19 | } 20 | }, 21 | "resourceLocation": { 22 | "type": "string", 23 | "defaultValue": "[parameters('resourceGroupLocation')]", 24 | "metadata": { 25 | "_parameterType": "location", 26 | "description": "Location of the resource. By default use resource group's location, unless the resource provider is not supported there." 27 | } 28 | } 29 | }, 30 | "resources": [ 31 | { 32 | "type": "Microsoft.Resources/resourceGroups", 33 | "name": "[parameters('resourceGroupName')]", 34 | "location": "[parameters('resourceGroupLocation')]", 35 | "apiVersion": "2019-10-01" 36 | }, 37 | { 38 | "type": "Microsoft.Resources/deployments", 39 | "name": "[concat(parameters('resourceGroupName'), 'Deployment', uniqueString(concat('docingstorage', subscription().subscriptionId)))]", 40 | "resourceGroup": "[parameters('resourceGroupName')]", 41 | "apiVersion": "2019-10-01", 42 | "dependsOn": [ 43 | "[parameters('resourceGroupName')]" 44 | ], 45 | "properties": { 46 | "mode": "Incremental", 47 | "template": { 48 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 49 | "contentVersion": "1.0.0.0", 50 | "resources": [ 51 | { 52 | "sku": { 53 | "name": "Standard_LRS", 54 | "tier": "Standard" 55 | }, 56 | "kind": "StorageV2", 57 | "name": "docingstorage", 58 | "type": "Microsoft.Storage/storageAccounts", 59 | "location": "[parameters('resourceLocation')]", 60 | "apiVersion": "2017-10-01" 61 | } 62 | ] 63 | } 64 | } 65 | } 66 | ], 67 | "metadata": { 68 | "_dependencyType": "storage.azure" 69 | } 70 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "BlobStorageTriggeredFunction": { 4 | "commandName": "Project", 5 | "commandLineArgs": "--port 7221", 6 | "launchBrowser": false 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/Properties/serviceDependencies.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "secrets1": { 4 | "type": "secrets" 5 | }, 6 | "storage1": { 7 | "type": "storage", 8 | "connectionId": "MyStorageConnectionString", 9 | "dynamicId": null 10 | }, 11 | "secrets2": { 12 | "type": "secrets", 13 | "connectionId": "VaultUri" 14 | }, 15 | "storage2": { 16 | "type": "storage", 17 | "connectionId": "AzureWebJobsStorage" 18 | }, 19 | "appInsights1": { 20 | "type": "appInsights", 21 | "connectionId": "APPINSIGHTS_INSTRUMENTATIONKEY" 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/Properties/serviceDependencies.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "secrets1": { 4 | "type": "secrets.user" 5 | }, 6 | "storage1": { 7 | "serviceConnectorResourceId": "/subscriptions/[parameters('subscriptionId')]/resourceGroups/[parameters('resourceGroupName')]/providers/Microsoft.ServiceLinker/locations/westcentralus/connectors/MyStorageConnectionString_753B1FC339", 8 | "secretStore": "LocalSecretsFile", 9 | "resourceId": "/subscriptions/[parameters('subscriptionId')]/resourceGroups/[parameters('resourceGroupName')]/providers/Microsoft.Storage/storageAccounts/docingstorage", 10 | "type": "storage.azure", 11 | "connectionId": "MyStorageConnectionString", 12 | "dynamicId": null 13 | }, 14 | "secrets2": { 15 | "secretStore": null, 16 | "resourceId": "/subscriptions/[parameters('subscriptionId')]/resourceGroups/[parameters('resourceGroupName')]/providers/Microsoft.KeyVault/vaults/docing-kv", 17 | "type": "secrets.keyVault", 18 | "connectionId": "VaultUri" 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/DocumentVectorPipelineFunctions/local.settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "IsEncrypted": false, 3 | "Values": { 4 | "AzureWebJobsStorage": "AzureWebJobsStorageConnectionStringValue", 5 | "FUNCTIONS_WORKER_RUNTIME": "dotnet-isolated" 6 | } 7 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/README.md: -------------------------------------------------------------------------------- 1 | # Azure Cosmos DB Document Ingestion & Processing Pipeline - Solution Accelerator 2 | 3 | ## Background 4 | This solution accelerator configures a simple pipeline for ingesting content stored in document form (pdf, docx, etc) into a searchable index in Azure Cosmos DB for NoSQL. 5 | 6 | **Let us know what you think!** Please reach out to CDB4AI@Microsoft.com with feedback and create a GitHub issue for any bugs/feature requests. 7 | 8 | ### Pipeline stages 9 | The basic stages of the pipeline include: 10 | 11 | 1. File upload to Azure blob storage. 12 | 1. Text extraction - converting the document format into raw text to be indexed. 13 | 1. Text chunking - breaking the text into reasonable size chunks for LLMs to process. 14 | 1. Text embedding - using an LLM to produce a vector embedding of the semantics of text chunk. 15 | 1. Text storage - storing each text chunk along with it's embedding in an Azure Cosmos DB container configured to perform efficient vector (and eventually Full-text) searches. 16 | 17 | ![pipleline](images/pipeline.png "Pipeline") 18 | 19 | ### Technology choices 20 | Currently this proof of concept uses: 21 | * Azure Blob storage for upload of documents. 22 | * Azure Functions to process the pipeline. 23 | * Azure Application Insights for logging. 24 | * Azure KeyVault to store secrets. 25 | * Azure Managed Identity to connect resources. 26 | * Azure AI Document Intelligenct for text extraction using the `prebuilt-layout` model. 27 | * Fixed size, non-overlapping text chunking. 28 | * The `text-embedding-3-large` embedding model from Azure OpenAI for embedding. 29 | * Cosmos DB's `DiskANN` index for the resulting vectors. 30 | 31 | ## Setup 32 | 33 | ### Prerequisites 34 | * An Azure subscription with access to Azure OpenAI. 35 | * The Azure CLI installed. 36 | * A Powershell prompt. 37 | * Download the Azure Functions zip file* from the [latest release](https://github.com/Azure/document-vector-pipeline/releases) (or build your own from this repo with `dotnet publish -c Release`, and then zip the resulting publish directory) 38 | 39 | ### Steps 40 | 1. Create a Resource Group in your Azure subscritpion in the region where you want your resources deployed. Ensure it's a region that supports all of the above Azure Resource types. Examples include `West US`, `East US`, and `East US2`. 41 | 42 | 1. Set variables and subscription: 43 | ```powershell 44 | $sub = "" 45 | $rg = "" 46 | az account set --subscription $sub 47 | ``` 48 | 49 | 1. Deploy initial set of resources 50 | ```powershell 51 | az deployment group create --name 'deploy1' --resource-group $rg --template-file 'main.bicep' -p .\main.bicepparam 52 | ``` 53 | This step will likely take several minutes to complete - it will create all of the required Azure resources. 54 | 55 | NOTE: Some resource names must be globally unique. You can a different base name for the created resources by altering the `baseName` variable in the `main.bicepparam` file. If you do, make note of the new names for some of the steps below. 56 | 57 | 1. Enable Vector search in your Azure Cosmos DB account 58 | 59 | 1. Navigate to the created Cosmos DB account in the Azure Portal. 60 | 1. Click on the `Settings\Features` blade. 61 | 1. Click on the `Vector Search for NoSQL API (preview)` feature, and then click `Enable`. Note: It can take up to 15 minutes to complete the enabling of this feature, and may cause errors during processing of documents during that time. We are currently investigating this issue. 62 | 1. See the image below: 63 | 64 | ![screenshot](images/enable-vector-search.png "Enable vector search") 65 | 66 | 1. Create Databse and Container 67 | 68 | 1. In the account above, navigate to Explorer and Create a new container. 69 | 1. For the Database, create a new database named `semantic_search_db`. 70 | 1. For the Container, name it `doc_search_container`. 71 | 1. Set the Partition Key to `/document_url`. 72 | 1. Create a new Vector embedding with: 73 | 1. Path: `/embedding` 74 | 1. Datatype: float32 75 | 1. Distance Function: cosine, 76 | 1. Dimensions: 1536 77 | 1. Index type: quantizedFlat 78 | 79 | 1. Deploy the functions app code 80 | Set variables for the path to the zip file you downloaded in the prerequisites, and for the name of the functions app. By default, that will be `docingfuncapp` as below. 81 | ```powershell 82 | $zippedPath = "E:\deployment\functionapp.zip" 83 | $funappname = 'docingfuncapp' 84 | echo "---> Deploying Function Code" 85 | az functionapp deployment source config-zip -g $rg -n $funappname --src $zippedPath 86 | ``` 87 | 88 | 1. Monitor traces 89 | ```powershell 90 | # Monitor traces and items in cosmosdb account. 91 | $funappname = 'functionapp' 92 | echo "---> Monitoring Function Code" 93 | func azure functionapp logstream $funappname 94 | ``` 95 | Note - `func` above comes from the Azure Functions tools. You can also view this log stream in the Azure Portal by navigating the the Azure Functions app created above, and clicking on the `Monitoring\Log Stream` blade. 96 | 97 | 1. Upload documents to Azure blob storage account 98 | 1. Navigate to the storage account created above (`docingblobacc` by default). 99 | 1. Click on the `Storage Browser` blade 100 | 1. Click on `Blob containers` and then the `documents` folder. 101 | 1. Click the `Upload` button in the toolbar, and then drag or browse to a document. 102 | 1. Check the event stream, and your Cosmos DB account. The document should be processed and ingested into the `doc_search_container` container of the `semantic_search_db` database in this account. 103 | 104 | 1. Query data 105 | 106 | Build an intelligent, context-aware application using the searchable data in your Cosmos DB account. See the [documentation](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search) for details. 107 | 108 | Good luck! 109 | 110 | 111 | ## Contributing 112 | 113 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 114 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 115 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 116 | 117 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 118 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 119 | provided by the bot. You will only need to do this once across all repos using our CLA. 120 | 121 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 122 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 123 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 124 | 125 | ## Trademarks 126 | 127 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 128 | trademarks or logos is subject to and must follow 129 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 130 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 131 | Any use of third-party trademarks or logos are subject to those third-party's policies. 132 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/azure.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json 2 | 3 | name: document-vector-pipeline 4 | metadata: 5 | template: azd-init@1.9.5 6 | services: 7 | DocumentVectorPipelineFunctions: 8 | project: DocumentVectorPipelineFunctions 9 | host: function 10 | language: dotnet 11 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/cosmosdb.bicep: -------------------------------------------------------------------------------- 1 | param location string = resourceGroup().location 2 | param capabilities array = [ 3 | { name: 'EnableServerless' } 4 | { name: 'EnableNoSQLVectorSearch' /*TODO: This doesn't seem to work on account creation.*/ } 5 | ] 6 | 7 | // Input parameters 8 | param databaseName string 9 | param name string 10 | param tags object 11 | 12 | // Create cosmosdb account 13 | resource cosmosDB 'Microsoft.DocumentDB/databaseAccounts@2024-05-15' = { 14 | name: name 15 | location: location 16 | kind: 'GlobalDocumentDB' 17 | properties: { 18 | consistencyPolicy: { 19 | defaultConsistencyLevel: 'Session' 20 | } 21 | databaseAccountOfferType: 'Standard' 22 | locations: [ 23 | { 24 | failoverPriority: 0 25 | isZoneRedundant: false 26 | locationName: location 27 | } 28 | ] 29 | capabilities: capabilities 30 | } 31 | tags: tags 32 | } 33 | 34 | // Assign user identity permissions to storage account 35 | param managedIdentityName string 36 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 37 | name: managedIdentityName 38 | } 39 | 40 | // Create database 41 | resource database 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2024-05-15' = { 42 | parent: cosmosDB 43 | name: databaseName 44 | properties: { 45 | resource: { 46 | id: databaseName 47 | } 48 | } 49 | tags: tags 50 | } 51 | 52 | param id_role string = '00000000-0000-0000-0000-000000000002' // Built-in data contributor 53 | resource roleAssignmentSqlCosmosDB 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2021-10-15' = { 54 | name: guid(resourceGroup().id, '${name}-datacontributorrole', id_role) 55 | parent: cosmosDB 56 | properties: { 57 | principalId: managedIdentity.properties.principalId 58 | roleDefinitionId: resourceId('Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions', name, id_role) 59 | scope: cosmosDB.id 60 | } 61 | } 62 | 63 | output CosmosDBAccountName string = cosmosDB.name 64 | output CosmosDBEndpoint string = cosmosDB.properties.documentEndpoint 65 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/cosmosdb.bicepparam: -------------------------------------------------------------------------------- 1 | using './cosmosdb.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param name = 'docingcosmosacc' 5 | param databaseName = 'semantic_search_db' 6 | 7 | param capabilities = [ 8 | { name: 'EnableServerless' } 9 | { name: 'EnableNoSQLVectorSearch' } 10 | ] 11 | param tags = {} 12 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/documentintelligence.bicep: -------------------------------------------------------------------------------- 1 | @description('Location to deploy the resource. Defaults to the location of the resource group.') 2 | param location string = resourceGroup().location 3 | 4 | // Input parameters 5 | param name string 6 | param tags object 7 | param sku object 8 | param publicNetworkAccess string 9 | param disableLocalAuth bool 10 | 11 | // Assign user identity permissions to storage account 12 | param managedIdentityName string 13 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 14 | name: managedIdentityName 15 | } 16 | 17 | // Create document intelligence resource 18 | resource documentIntelligence 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' = { 19 | name: name 20 | location: location 21 | tags: tags 22 | kind: 'FormRecognizer' 23 | properties: { 24 | customSubDomainName: name 25 | disableLocalAuth: disableLocalAuth 26 | publicNetworkAccess: publicNetworkAccess 27 | networkAcls: { 28 | defaultAction: 'Allow' 29 | ipRules: [] 30 | virtualNetworkRules: [] 31 | } 32 | } 33 | identity: { 34 | type: 'UserAssigned' 35 | userAssignedIdentities: { 36 | '${managedIdentity.id}': {} 37 | } 38 | } 39 | sku: sku 40 | } 41 | 42 | param storage_account_id_roles array = ['a97b65f3-24c7-4388-baec-2e87135dc908'] //Cognitive service user 43 | resource roleAssignmentDocumentIntelligence 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 44 | for id_role in storage_account_id_roles: { 45 | name: guid(resourceGroup().id, '${documentIntelligence.name}-storagerole', id_role) 46 | scope: documentIntelligence 47 | properties: { 48 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 49 | principalId: managedIdentity.properties.principalId 50 | } 51 | } 52 | ] 53 | 54 | // Output parameters 55 | @description('Name for the deployed Document Intelligence resource.') 56 | output DocumentIntelligenceName string = documentIntelligence.name 57 | 58 | @description('Endpoint for the deployed Document Intelligence resource.') 59 | output DocumentIntelligenceEndpoint string = documentIntelligence.properties.endpoint 60 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/documentintelligence.bicepparam: -------------------------------------------------------------------------------- 1 | using './documentintelligence.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param name = 'docingdocintl' 5 | 6 | param tags = {} 7 | param sku = { 8 | name: 'S0' 9 | } 10 | param publicNetworkAccess = 'Enabled' 11 | param disableLocalAuth = false 12 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/functionapp.bicep: -------------------------------------------------------------------------------- 1 | var location = resourceGroup().location 2 | 3 | // Input params 4 | param funcAppStorageAccountName string 5 | param funcAppStorageSkuName string 6 | param appInsightsName string 7 | param appServicePlanName string 8 | param functionAppName string 9 | param logAnalyticsName string 10 | param managedIdentityName string 11 | param cosmosdbAccountName string 12 | param diAccountName string 13 | param openAIAccountName string 14 | param storageAccountName string 15 | param modelDeployment string 16 | param modelDimensions string 17 | param maxTokensPerChunk string 18 | param overlapTokens string 19 | 20 | // Get existing managed identity resource 21 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 22 | name: managedIdentityName 23 | } 24 | 25 | resource cosmosDB 'Microsoft.DocumentDB/databaseAccounts@2024-05-15' existing = { 26 | name: cosmosdbAccountName 27 | } 28 | 29 | resource documentIntelligence 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' existing = { 30 | name: diAccountName 31 | } 32 | 33 | resource openAi 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' existing = { 34 | name: openAIAccountName 35 | } 36 | 37 | resource storageAccount 'Microsoft.Storage/storageAccounts@2023-05-01' existing = { 38 | name: storageAccountName 39 | } 40 | var storageConnectionStringValue = 'DefaultEndpointsProtocol=https;AccountName=${storageAccount.name};EndpointSuffix=${environment().suffixes.storage};AccountKey=${storageAccount.listKeys().keys[0].value}' 41 | 42 | // Create webapps storage account to hold webapps related resources 43 | resource func_app_storage_account 'Microsoft.Storage/storageAccounts@2023-05-01' = { 44 | name: funcAppStorageAccountName 45 | location: location 46 | sku: { 47 | name: funcAppStorageSkuName 48 | } 49 | kind: 'StorageV2' 50 | properties: { 51 | accessTier: 'Hot' 52 | } 53 | resource blobService 'blobServices' = { 54 | name: 'default' 55 | } 56 | } 57 | var funcAppStorageConnectionStringValue = 'DefaultEndpointsProtocol=https;AccountName=${func_app_storage_account.name};EndpointSuffix=${environment().suffixes.storage};AccountKey=${func_app_storage_account.listKeys().keys[0].value}' 58 | 59 | // Assign storage account contributor role to func_app_storage_account 60 | param storage_account_id_roles array = ['ba92f5b4-2d11-453d-a403-e96b0029c9fe'] // Storage blob data contributor 61 | resource roleAssignmentFuncStorageAccount 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = [ 62 | for id_role in storage_account_id_roles: { 63 | name: guid(resourceGroup().id, '${func_app_storage_account.name}-webjobsrole', id_role) 64 | scope: func_app_storage_account 65 | properties: { 66 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 67 | principalId: managedIdentity.properties.principalId 68 | } 69 | } 70 | ] 71 | 72 | // Create a new Log Analytics workspace to back the Azure Application Insights instance 73 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2023-09-01' = { 74 | name: logAnalyticsName 75 | location: location 76 | properties: { 77 | sku: { 78 | name: 'PerGB2018' 79 | } 80 | retentionInDays: 30 81 | features: { 82 | enableLogAccessUsingOnlyResourcePermissions: true 83 | } 84 | workspaceCapping: { 85 | dailyQuotaGb: 1 86 | } 87 | publicNetworkAccessForIngestion: 'Enabled' 88 | publicNetworkAccessForQuery: 'Enabled' 89 | } 90 | } 91 | 92 | // Application Insights instance 93 | resource appInsights 'Microsoft.Insights/components@2020-02-02' = { 94 | name: appInsightsName 95 | location: location 96 | kind: 'web' 97 | properties: { 98 | Application_Type: 'web' 99 | publicNetworkAccessForIngestion: 'Enabled' 100 | publicNetworkAccessForQuery: 'Enabled' 101 | WorkspaceResourceId: logAnalytics.id 102 | } 103 | } 104 | 105 | // Web server farm 106 | resource appservice_plan 'Microsoft.Web/serverfarms@2023-12-01' = { 107 | name: appServicePlanName 108 | location: location 109 | kind: 'functionapp' 110 | sku: { 111 | name: 'Y1' 112 | } 113 | properties: {} 114 | } 115 | 116 | // Deploy the Azure Function app with application 117 | resource funcApp 'Microsoft.Web/sites@2023-12-01' = { 118 | name: functionAppName 119 | location: location 120 | kind: 'functionapp' 121 | identity: { 122 | type: 'SystemAssigned, UserAssigned' 123 | userAssignedIdentities: { 124 | '${managedIdentity.id}': {} 125 | } 126 | } 127 | properties: { 128 | httpsOnly: true 129 | serverFarmId: appservice_plan.id 130 | keyVaultReferenceIdentity: managedIdentity.id 131 | enabled: true 132 | siteConfig: { 133 | appSettings: [ 134 | { 135 | name: 'AzureWebJobsStorage' 136 | value: funcAppStorageConnectionStringValue 137 | } 138 | // TODO(amisi) - directly hookup managed identity with blob trigger 139 | // { 140 | // name: 'AzureWebJobsStorage__accountName' 141 | // value: funcAppStorageAccountName 142 | // } 143 | // { 144 | // name: 'AzureWebJobsStorage__credential' 145 | // value: 'managedIdentity' 146 | // } 147 | // { 148 | // name: 'AzureWebJobsStorage__clientId' 149 | // value: managedIdentity.properties.clientId 150 | // } 151 | { 152 | name: 'WEBSITE_CONTENTAZUREFILECONNECTIONSTRING' 153 | value: funcAppStorageConnectionStringValue 154 | } 155 | { 156 | name: 'APPINSIGHTS_INSTRUMENTATIONKEY' 157 | value: appInsights.properties.InstrumentationKey 158 | } 159 | { 160 | name: 'WEBSITE_RUN_FROM_PACKAGE' 161 | value: '1' 162 | } 163 | { 164 | name: 'FUNCTIONS_WORKER_RUNTIME' 165 | value: 'dotnet-isolated' 166 | } 167 | { 168 | name: 'netFrameworkVersion' 169 | value: 'v8.0' 170 | } 171 | { 172 | name: 'FUNCTIONS_EXTENSION_VERSION' 173 | value: '~4' 174 | } 175 | { 176 | name: 'AzureBlobStorageAccConnectionString' 177 | value: storageConnectionStringValue 178 | } 179 | // TODO(amisi) - directly hookup managed identity with blob trigger 180 | // { 181 | // name: 'AzureBlobStorageAccConnectionString__serviceUri' 182 | // value: storageAccount.properties.primaryEndpoints.blob 183 | // } 184 | // { 185 | // name: 'AzureBlobStorageAccConnectionString__credential' 186 | // value: 'managedIdentity' 187 | // } 188 | // { 189 | // name: 'AzureBlobStorageAccConnectionString__clientId' 190 | // value: managedIdentity.properties.clientId 191 | // } 192 | { 193 | name: 'AzureManagedIdentityClientId' 194 | value: managedIdentity.properties.clientId 195 | } 196 | { 197 | name: 'AzureCosmosDBConnectionString' 198 | value: cosmosDB.properties.documentEndpoint 199 | } 200 | { 201 | name: 'AzureDocumentIntelligenceConnectionString' 202 | value: documentIntelligence.properties.endpoint 203 | } 204 | { 205 | name: 'AzureOpenAIConnectionString' 206 | value: openAi.properties.endpoint 207 | } 208 | { 209 | name: 'AzureOpenAIModelDeployment' 210 | value: modelDeployment 211 | } 212 | { 213 | name: 'AzureOpenAIModelDimensions' 214 | value: modelDimensions 215 | } 216 | { 217 | name: 'MaxTokensPerChunk' 218 | value: maxTokensPerChunk 219 | } 220 | { 221 | name: 'OverlapTokens' 222 | value: overlapTokens 223 | } 224 | { 225 | name: 'AzureFunctionsJobHost__functionTimeout' 226 | value: '00:10:00' 227 | } 228 | ] 229 | } 230 | } 231 | } 232 | 233 | // Assign storage account contributor role to azure function app 234 | param id_roles_arr array = ['b24988ac-6180-42a0-ab88-20f7382dd24c'] // Contributor (priviledged access) 235 | resource roleAssignmentFunctionApp 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 236 | for id_role in id_roles_arr: { 237 | name: guid(resourceGroup().id, '${func_app_storage_account.name}-funcrole', id_role) 238 | scope: funcApp 239 | properties: { 240 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 241 | principalId: managedIdentity.properties.principalId 242 | } 243 | } 244 | ] 245 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/functionapp.bicepparam: -------------------------------------------------------------------------------- 1 | using './functionapp.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param functionAppName = 'docingfunc' 5 | param cosmosdbAccountName = 'docingcosmosacc' 6 | param diAccountName = 'docingdocintl' 7 | param openAIAccountName = 'docingopenaiacc' 8 | param storageAccountName = 'docingblobacc' 9 | 10 | param funcAppStorageSkuName = 'Standard_LRS' 11 | param funcAppStorageAccountName = '${functionAppName}store' 12 | param appInsightsName = '${functionAppName}insight' 13 | param appServicePlanName = '${functionAppName}service' 14 | param logAnalyticsName = '${functionAppName}log' 15 | 16 | param modelDeployment = 'text-embedding-3-large' 17 | param modelDimensions = '1536' 18 | 19 | param maxTokensPerChunk = '250' 20 | param overlapTokens = '0' 21 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/main.bicep: -------------------------------------------------------------------------------- 1 | // Resource params 2 | param tags object = {} 3 | 4 | // Managed identity params 5 | param managedIdentity_name string 6 | 7 | // Storage params 8 | param storage_name string 9 | param storage_containers array = [] 10 | 11 | // Function app params 12 | param function_app_name string 13 | param function_app_storageSkuName string 14 | 15 | param function_app_storageAccountName string = '${function_app_name}store' 16 | param function_app_appInsightsName string = '${function_app_name}insight' 17 | param function_app_logAnalyticsName string = '${function_app_name}log' 18 | param function_app_appServicePlanName string = '${function_app_name}service' 19 | 20 | param maxTokensPerChunk string 21 | param overlapTokens string 22 | 23 | // CosmosDB params 24 | param cosmosdb_capabilities array 25 | param cosmosdb_databaseName string 26 | param cosmosdb_name string 27 | 28 | // Open AI params 29 | param open_ai_deployments array 30 | param open_ai_name string 31 | param open_ai_sku string 32 | param open_ai_kind string 33 | param open_ai_format string 34 | param open_ai_publicNetworkAccess string 35 | param modelDeployment string 36 | param modelDimensions string 37 | 38 | // Document intelligence params 39 | param document_intelligence_name string 40 | param document_intelligence_sku object 41 | param document_intelligence_publicNetworkAccess string 42 | param document_intelligence_disableLocalAuth bool 43 | 44 | // User managed identity resource 45 | module userManagedIdentity_deployment 'userIdentity.bicep' = { 46 | name: 'userManagedIdentity_deployment' 47 | params: { 48 | managedIdentityName: managedIdentity_name 49 | } 50 | } 51 | 52 | // Storage resource 53 | module storage_deployment 'storage.bicep' = { 54 | name: 'storage_deployment' 55 | params: { 56 | name: storage_name 57 | containers: storage_containers 58 | tags: tags 59 | managedIdentityName: managedIdentity_name 60 | } 61 | dependsOn: [ 62 | userManagedIdentity_deployment 63 | ] 64 | } 65 | 66 | // CosmosDB resource 67 | module cosmosdb_deployment 'cosmosdb.bicep' = { 68 | name: 'cosmosdb_deployment' 69 | params: { 70 | managedIdentityName: managedIdentity_name 71 | capabilities: cosmosdb_capabilities 72 | databaseName: cosmosdb_databaseName 73 | name: cosmosdb_name 74 | tags: tags 75 | } 76 | dependsOn: [ 77 | userManagedIdentity_deployment 78 | storage_deployment 79 | ] 80 | } 81 | 82 | // Document Intelligence resource 83 | module document_intelligence_deployment 'documentintelligence.bicep' = { 84 | name: 'document_intelligence_deployment' 85 | params: { 86 | name: document_intelligence_name 87 | managedIdentityName: managedIdentity_name 88 | sku: document_intelligence_sku 89 | publicNetworkAccess: document_intelligence_publicNetworkAccess 90 | disableLocalAuth: document_intelligence_disableLocalAuth 91 | tags: tags 92 | } 93 | dependsOn: [ 94 | userManagedIdentity_deployment 95 | storage_deployment 96 | ] 97 | } 98 | 99 | // OpenAI Resource 100 | module open_ai_deployment 'openai.bicep' = { 101 | name: 'open_ai_deployment' 102 | params: { 103 | deployments: open_ai_deployments 104 | managedIdentityName: managedIdentity_name 105 | name: open_ai_name 106 | format: open_ai_format 107 | kind: open_ai_kind 108 | sku: open_ai_sku 109 | publicNetworkAccess: open_ai_publicNetworkAccess 110 | tags: tags 111 | } 112 | dependsOn: [ 113 | userManagedIdentity_deployment 114 | ] 115 | } 116 | 117 | // Function App Resource 118 | module function_app_deployment 'functionapp.bicep' = { 119 | name: 'function_app_deployment' 120 | params: { 121 | managedIdentityName: managedIdentity_name 122 | functionAppName: function_app_name 123 | funcAppStorageSkuName: function_app_storageSkuName 124 | funcAppStorageAccountName: function_app_storageAccountName 125 | appInsightsName: function_app_appInsightsName 126 | appServicePlanName: function_app_appServicePlanName 127 | logAnalyticsName: function_app_logAnalyticsName 128 | cosmosdbAccountName: cosmosdb_name 129 | diAccountName: document_intelligence_name 130 | openAIAccountName: open_ai_name 131 | storageAccountName: storage_name 132 | modelDeployment: modelDeployment 133 | modelDimensions: modelDimensions 134 | maxTokensPerChunk: maxTokensPerChunk 135 | overlapTokens: overlapTokens 136 | } 137 | dependsOn: [ 138 | userManagedIdentity_deployment 139 | storage_deployment 140 | open_ai_deployment 141 | document_intelligence_deployment 142 | cosmosdb_deployment 143 | ] 144 | } 145 | 146 | // Output params 147 | // User Managed Identity and KeyVault Output Params 148 | output AZURE_USER_MANAGED_IDENTITY_NAME string = userManagedIdentity_deployment.outputs.AzureManagedIdentityName 149 | output AZURE_USER_MANAGED_IDENTITY_ID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityId 150 | output AZURE_USER_MANAGED_IDENTITY_CLIENTID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityClientId 151 | output AZURE_USER_MANAGED_IDENTITY_PRINCIPALID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityPrincipalId 152 | output AZURE_USER_MANAGED_IDENTITY_TENANTID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityTenantId 153 | 154 | // Storage Params 155 | output AZURE_BLOB_STORE_ACCOUNT_NAME string = storage_deployment.outputs.AzureBlobStorageAccountName 156 | output AZURE_BLOB_STORE_ACCOUNT_ENDPOINT string = storage_deployment.outputs.AzureBlobStorageAccountEndpoint 157 | 158 | // CosmosDB Params 159 | output AZURE_COSMOS_DB_ACCOUNT_NAME string = cosmosdb_deployment.outputs.CosmosDBAccountName 160 | output AZURE_COSMOS_DB_ENDPOINT string = cosmosdb_deployment.outputs.CosmosDBEndpoint 161 | 162 | // Document Intelligence Params 163 | output AZURE_DOCUMENT_INTELLIGENCE_NAME string = document_intelligence_deployment.outputs.DocumentIntelligenceName 164 | output AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT string = document_intelligence_deployment.outputs.DocumentIntelligenceEndpoint 165 | 166 | // OpenAI 167 | output AZURE_OPEN_AI_SERVICE_NAME string = open_ai_deployment.outputs.openAIServiceName 168 | output AZURE_OPEN_AI_SERVICE_ENDPOINT string = open_ai_deployment.outputs.openAIServiceEndpoint 169 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/main.bicepparam: -------------------------------------------------------------------------------- 1 | using './main.bicep' 2 | 3 | var baseName = 'docing' 4 | 5 | // Naming params 6 | param managedIdentity_name = '${baseName}useridentity' 7 | param storage_name = '${baseName}blobacc' 8 | param function_app_name = '${baseName}funcapp' 9 | param cosmosdb_name = '${baseName}cosmosacc' 10 | param document_intelligence_name = '${baseName}docintl' 11 | param open_ai_name = '${baseName}openai' 12 | 13 | // Common params 14 | param tags = {} 15 | 16 | // Storage params 17 | param storage_containers = [ 18 | { 19 | name: 'documents' 20 | } 21 | ] 22 | 23 | // Function app params 24 | param function_app_storageSkuName = 'Standard_LRS' 25 | 26 | param maxTokensPerChunk = '250' 27 | param overlapTokens = '0' 28 | 29 | // CosmosDB params 30 | param cosmosdb_databaseName = 'semantic_search_db' 31 | param cosmosdb_capabilities = [ 32 | { name: 'EnableServerless' } 33 | { name: 'EnableNoSQLVectorSearch' } 34 | ] 35 | 36 | // Document Intelligence Params 37 | param document_intelligence_sku = { 38 | name: 'S0' 39 | } 40 | param document_intelligence_publicNetworkAccess = 'Enabled' 41 | param document_intelligence_disableLocalAuth = false 42 | 43 | // Open AI params 44 | param modelDeployment = 'text-embedding-3-large' 45 | param modelDimensions = '1536' 46 | param open_ai_deployments = [ 47 | { 48 | name: modelDeployment 49 | sku: { 50 | name: 'Standard' 51 | capacity: 100 52 | } 53 | model: { 54 | name: modelDeployment 55 | version: '1' 56 | } 57 | } 58 | ] 59 | param open_ai_sku = 'S0' 60 | param open_ai_kind = 'OpenAI' 61 | param open_ai_format = 'OpenAI' 62 | param open_ai_publicNetworkAccess = 'Enabled' 63 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/openai.bicep: -------------------------------------------------------------------------------- 1 | param location string = resourceGroup().location 2 | 3 | // Input parameters 4 | param deployments array 5 | param name string 6 | param sku string 7 | param tags object 8 | param kind string 9 | param format string 10 | param publicNetworkAccess string 11 | 12 | // Create openAI resource 13 | resource openAi 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' = { 14 | name: name 15 | location: location 16 | sku: { 17 | name: sku 18 | } 19 | kind: kind 20 | properties: { 21 | customSubDomainName: name 22 | publicNetworkAccess: publicNetworkAccess 23 | } 24 | tags: tags 25 | identity: { 26 | type: 'UserAssigned' 27 | userAssignedIdentities: { 28 | '${managedIdentity.id}': {} 29 | } 30 | } 31 | } 32 | 33 | @batchSize(1) 34 | resource openAiDeployments 'Microsoft.CognitiveServices/accounts/deployments@2024-04-01-preview' = [ 35 | for deployment in deployments: { 36 | parent: openAi 37 | name: deployment.name 38 | sku: { 39 | capacity: deployment.sku.capacity 40 | name: deployment.sku.name 41 | } 42 | properties: { 43 | model: { 44 | format: format 45 | name: deployment.model.name 46 | version: deployment.model.version 47 | } 48 | } 49 | } 50 | ] 51 | 52 | // Assign user managed identity to openai app. 53 | param managedIdentityName string 54 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 55 | name: managedIdentityName 56 | } 57 | param storage_account_id_roles array = [ 58 | 'a97b65f3-24c7-4388-baec-2e87135dc908' //Cognitive Services User 59 | ] 60 | 61 | resource roleAssignmentOpenAIAccount 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 62 | for id_role in storage_account_id_roles: { 63 | name: guid(resourceGroup().id, '${name}-openairole', id_role) 64 | scope: openAi 65 | properties: { 66 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 67 | principalId: managedIdentity.properties.principalId 68 | } 69 | } 70 | ] 71 | 72 | output openAIServiceName string = openAi.name 73 | output openAIServiceEndpoint string = openAi.properties.endpoint 74 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/openai.bicepparam: -------------------------------------------------------------------------------- 1 | using './openai.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param name = 'docingopenaiacc' 5 | 6 | param deployments = [ 7 | { 8 | name: 'text-embedding-3-large' 9 | sku: { 10 | name: 'Standard' 11 | capacity: 40 12 | } 13 | model: { 14 | name: 'text-embedding-3-large' 15 | version: '1' 16 | } 17 | } 18 | ] 19 | param sku = 'S0' 20 | param kind = 'OpenAI' 21 | param format = 'OpenAI' 22 | param publicNetworkAccess = 'Enabled' 23 | param tags = {} 24 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/storage.bicep: -------------------------------------------------------------------------------- 1 | param location string = resourceGroup().location 2 | 3 | // Input parameters 4 | param name string 5 | param tags object 6 | param containers array = [] 7 | 8 | // Create storage account 9 | resource storage 'Microsoft.Storage/storageAccounts@2023-05-01' = { 10 | name: name 11 | location: location 12 | kind: 'StorageV2' 13 | sku: { 14 | name: 'Standard_LRS' 15 | } 16 | tags: tags 17 | } 18 | 19 | // Create storage containers 20 | resource blobService 'Microsoft.Storage/storageAccounts/blobServices@2023-05-01' = { 21 | parent: storage 22 | name: 'default' 23 | } 24 | 25 | resource blobContainers 'Microsoft.Storage/storageAccounts/blobServices/containers@2023-05-01' = [ 26 | for container in containers: { 27 | parent: blobService 28 | name: container.name 29 | } 30 | ] 31 | 32 | // Assign user identity permissions to storage account 33 | param managedIdentityName string 34 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 35 | name: managedIdentityName 36 | } 37 | 38 | param storage_account_id_roles array = ['2a2b9908-6ea1-4ae2-8e65-a410df84e7d1'] // Storage blob data reader 39 | resource roleAssignmentStorageAccount 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 40 | for id_role in storage_account_id_roles: { 41 | name: guid(resourceGroup().id, '${storage.name}-storagerole', id_role) 42 | scope: blobService 43 | properties: { 44 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 45 | principalId: managedIdentity.properties.principalId 46 | principalType: 'ServicePrincipal' 47 | } 48 | } 49 | ] 50 | 51 | // Output storage account name, connection string and key 52 | output AzureBlobStorageAccountName string = storage.name 53 | output AzureBlobStorageAccountEndpoint string = storage.properties.primaryEndpoints.blob 54 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/storage.bicepparam: -------------------------------------------------------------------------------- 1 | using './storage.bicep' 2 | 3 | param name = 'docingblobacc' 4 | param managedIdentityName = 'docinguseridentity' 5 | param containers = [ 6 | { 7 | name: 'documents' 8 | } 9 | ] 10 | 11 | param tags = {} 12 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/userIdentity.bicep: -------------------------------------------------------------------------------- 1 | param managedIdentityName string 2 | param location string = resourceGroup().location 3 | 4 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' = { 5 | name: managedIdentityName 6 | location: location 7 | } 8 | 9 | output AzureManagedIdentityId string = managedIdentity.id 10 | output AzureManagedIdentityName string = managedIdentity.name 11 | output AzureManagedIdentityClientId string = managedIdentity.properties.clientId 12 | output AzureManagedIdentityPrincipalId string = managedIdentity.properties.principalId 13 | output AzureManagedIdentityTenantId string = managedIdentity.properties.tenantId 14 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/deployment/userIdentity.bicepparam: -------------------------------------------------------------------------------- 1 | using './userIdentity.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/images/enable-vector-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/document-vector-pipeline/2b51566423b7c3a20d57a3e82f4739d1830edede/AzureCosmosDB/csharp/images/enable-vector-search.png -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/document-vector-pipeline/2b51566423b7c3a20d57a3e82f4739d1830edede/AzureCosmosDB/csharp/images/pipeline.png -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/abbreviations.json: -------------------------------------------------------------------------------- 1 | { 2 | "analysisServicesServers": "as", 3 | "apiManagementService": "apim-", 4 | "appConfigurationConfigurationStores": "appcs-", 5 | "appManagedEnvironments": "cae-", 6 | "appContainerApps": "ca-", 7 | "authorizationPolicyDefinitions": "policy-", 8 | "automationAutomationAccounts": "aa-", 9 | "blueprintBlueprints": "bp-", 10 | "blueprintBlueprintsArtifacts": "bpa-", 11 | "cacheRedis": "redis-", 12 | "cdnProfiles": "cdnp-", 13 | "cdnProfilesEndpoints": "cdne-", 14 | "cognitiveServicesAccounts": "cog-", 15 | "cognitiveServicesFormRecognizer": "cog-fr-", 16 | "cognitiveServicesTextAnalytics": "cog-ta-", 17 | "computeAvailabilitySets": "avail-", 18 | "computeCloudServices": "cld-", 19 | "computeDiskEncryptionSets": "des", 20 | "computeDisks": "disk", 21 | "computeDisksOs": "osdisk", 22 | "computeGalleries": "gal", 23 | "computeSnapshots": "snap-", 24 | "computeVirtualMachines": "vm", 25 | "computeVirtualMachineScaleSets": "vmss-", 26 | "containerInstanceContainerGroups": "ci", 27 | "containerRegistryRegistries": "cr", 28 | "containerServiceManagedClusters": "aks-", 29 | "databricksWorkspaces": "dbw-", 30 | "dataFactoryFactories": "adf-", 31 | "dataLakeAnalyticsAccounts": "dla", 32 | "dataLakeStoreAccounts": "dls", 33 | "dataMigrationServices": "dms-", 34 | "dBforMySQLServers": "mysql-", 35 | "dBforPostgreSQLServers": "psql-", 36 | "devicesIotHubs": "iot-", 37 | "devicesProvisioningServices": "provs-", 38 | "devicesProvisioningServicesCertificates": "pcert-", 39 | "documentDBDatabaseAccounts": "cosmos-", 40 | "eventGridDomains": "evgd-", 41 | "eventGridDomainsTopics": "evgt-", 42 | "eventGridEventSubscriptions": "evgs-", 43 | "eventHubNamespaces": "evhns-", 44 | "eventHubNamespacesEventHubs": "evh-", 45 | "hdInsightClustersHadoop": "hadoop-", 46 | "hdInsightClustersHbase": "hbase-", 47 | "hdInsightClustersKafka": "kafka-", 48 | "hdInsightClustersMl": "mls-", 49 | "hdInsightClustersSpark": "spark-", 50 | "hdInsightClustersStorm": "storm-", 51 | "hybridComputeMachines": "arcs-", 52 | "insightsActionGroups": "ag-", 53 | "insightsComponents": "appi-", 54 | "keyVaultVaults": "kv-", 55 | "kubernetesConnectedClusters": "arck", 56 | "kustoClusters": "dec", 57 | "kustoClustersDatabases": "dedb", 58 | "logicIntegrationAccounts": "ia-", 59 | "logicWorkflows": "logic-", 60 | "machineLearningServicesWorkspaces": "mlw-", 61 | "managedIdentityUserAssignedIdentities": "id-", 62 | "managementManagementGroups": "mg-", 63 | "migrateAssessmentProjects": "migr-", 64 | "networkApplicationGateways": "agw-", 65 | "networkApplicationSecurityGroups": "asg-", 66 | "networkAzureFirewalls": "afw-", 67 | "networkBastionHosts": "bas-", 68 | "networkConnections": "con-", 69 | "networkDnsZones": "dnsz-", 70 | "networkExpressRouteCircuits": "erc-", 71 | "networkFirewallPolicies": "afwp-", 72 | "networkFirewallPoliciesWebApplication": "waf", 73 | "networkFirewallPoliciesRuleGroups": "wafrg", 74 | "networkFrontDoors": "fd-", 75 | "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-", 76 | "networkLoadBalancersExternal": "lbe-", 77 | "networkLoadBalancersInternal": "lbi-", 78 | "networkLoadBalancersInboundNatRules": "rule-", 79 | "networkLocalNetworkGateways": "lgw-", 80 | "networkNatGateways": "ng-", 81 | "networkNetworkInterfaces": "nic-", 82 | "networkNetworkSecurityGroups": "nsg-", 83 | "networkNetworkSecurityGroupsSecurityRules": "nsgsr-", 84 | "networkNetworkWatchers": "nw-", 85 | "networkPrivateDnsZones": "pdnsz-", 86 | "networkPrivateLinkServices": "pl-", 87 | "networkPublicIPAddresses": "pip-", 88 | "networkPublicIPPrefixes": "ippre-", 89 | "networkRouteFilters": "rf-", 90 | "networkRouteTables": "rt-", 91 | "networkRouteTablesRoutes": "udr-", 92 | "networkTrafficManagerProfiles": "traf-", 93 | "networkVirtualNetworkGateways": "vgw-", 94 | "networkVirtualNetworks": "vnet-", 95 | "networkVirtualNetworksSubnets": "snet-", 96 | "networkVirtualNetworksVirtualNetworkPeerings": "peer-", 97 | "networkVirtualWans": "vwan-", 98 | "networkVpnGateways": "vpng-", 99 | "networkVpnGatewaysVpnConnections": "vcn-", 100 | "networkVpnGatewaysVpnSites": "vst-", 101 | "notificationHubsNamespaces": "ntfns-", 102 | "notificationHubsNamespacesNotificationHubs": "ntf-", 103 | "operationalInsightsWorkspaces": "log-", 104 | "portalDashboards": "dash-", 105 | "powerBIDedicatedCapacities": "pbi-", 106 | "purviewAccounts": "pview-", 107 | "recoveryServicesVaults": "rsv-", 108 | "resourcesResourceGroups": "rg-", 109 | "searchSearchServices": "srch-", 110 | "serviceBusNamespaces": "sb-", 111 | "serviceBusNamespacesQueues": "sbq-", 112 | "serviceBusNamespacesTopics": "sbt-", 113 | "serviceEndPointPolicies": "se-", 114 | "serviceFabricClusters": "sf-", 115 | "signalRServiceSignalR": "sigr", 116 | "sqlManagedInstances": "sqlmi-", 117 | "sqlServers": "sql-", 118 | "sqlServersDataWarehouse": "sqldw-", 119 | "sqlServersDatabases": "sqldb-", 120 | "sqlServersDatabasesStretch": "sqlstrdb-", 121 | "storageStorageAccounts": "st", 122 | "storageStorageAccountsVm": "stvm", 123 | "storSimpleManagers": "ssimp", 124 | "streamAnalyticsCluster": "asa-", 125 | "synapseWorkspaces": "syn", 126 | "synapseWorkspacesAnalyticsWorkspaces": "synw", 127 | "synapseWorkspacesSqlPoolsDedicated": "syndp", 128 | "synapseWorkspacesSqlPoolsSpark": "synsp", 129 | "timeSeriesInsightsEnvironments": "tsi-", 130 | "webServerFarms": "plan-", 131 | "webSitesAppService": "app-", 132 | "webSitesAppServiceEnvironment": "ase-", 133 | "webSitesFunctions": "func-", 134 | "webStaticSites": "stapp-" 135 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/app/processor.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | param applicationInsightsName string = '' 5 | param appServicePlanId string 6 | param appSettings object = {} 7 | param runtimeName string 8 | param runtimeVersion string 9 | param serviceName string = 'processor' 10 | param storageAccountName string 11 | param deploymentStorageContainerName string 12 | param virtualNetworkSubnetId string = '' 13 | param instanceMemoryMB int = 2048 14 | param maximumInstanceCount int = 100 15 | param identityId string = '' 16 | param identityClientId string = '' 17 | 18 | var applicationInsightsIdentity = 'ClientId=${identityClientId};Authorization=AAD' 19 | 20 | module processor '../core/host/functions-flexconsumption.bicep' = { 21 | name: '${serviceName}-functions-module' 22 | params: { 23 | name: name 24 | location: location 25 | tags: union(tags, { 'azd-service-name': serviceName }) 26 | identityType: 'UserAssigned' 27 | identityId: identityId 28 | appSettings: union(appSettings, 29 | { 30 | AzureWebJobsStorage__clientId : identityClientId 31 | APPLICATIONINSIGHTS_AUTHENTICATION_STRING: applicationInsightsIdentity 32 | }) 33 | applicationInsightsName: applicationInsightsName 34 | appServicePlanId: appServicePlanId 35 | runtimeName: runtimeName 36 | runtimeVersion: runtimeVersion 37 | storageAccountName: storageAccountName 38 | deploymentStorageContainerName: deploymentStorageContainerName 39 | virtualNetworkSubnetId: virtualNetworkSubnetId 40 | instanceMemoryMB: instanceMemoryMB 41 | maximumInstanceCount: maximumInstanceCount 42 | } 43 | } 44 | 45 | output SERVICE_PROCESSOR_NAME string = processor.outputs.name 46 | output SERVICE_API_IDENTITY_PRINCIPAL_ID string = processor.outputs.identityPrincipalId 47 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/app/storage-Access.bicep: -------------------------------------------------------------------------------- 1 | param principalID string 2 | param roleDefinitionID string 3 | param storageAccountName string 4 | 5 | resource storageAccount 'Microsoft.Storage/storageAccounts@2021-09-01' existing = { 6 | name: storageAccountName 7 | } 8 | 9 | // Allow access from API to storage account using a managed identity and least priv Storage roles 10 | resource storageRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { 11 | name: guid(storageAccount.id, principalID, roleDefinitionID) 12 | scope: storageAccount 13 | properties: { 14 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionID) 15 | principalId: principalID 16 | principalType: 'ServicePrincipal' // Workaround for https://learn.microsoft.com/en-us/azure/role-based-access-control/role-assignments-template#new-service-principal 17 | } 18 | } 19 | 20 | output ROLE_ASSIGNMENT_NAME string = storageRoleAssignment.name 21 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/app/storage-PrivateEndpoint.bicep: -------------------------------------------------------------------------------- 1 | // Parameters 2 | @description('Specifies the name of the virtual network.') 3 | param virtualNetworkName string 4 | 5 | @description('Specifies the name of the subnet which contains the virtual machine.') 6 | param subnetName string 7 | 8 | @description('Specifies the resource name of the Storage resource with an endpoint.') 9 | param resourceName string 10 | 11 | @description('Specifies the location.') 12 | param location string = resourceGroup().location 13 | 14 | param tags object = {} 15 | 16 | // Virtual Network 17 | resource vnet 'Microsoft.Network/virtualNetworks@2021-08-01' existing = { 18 | name: virtualNetworkName 19 | } 20 | 21 | resource storageAccount 'Microsoft.Storage/storageAccounts@2021-09-01' existing = { 22 | name: resourceName 23 | } 24 | 25 | var blobPrivateDNSZoneName = format('privatelink.blob.{0}', environment().suffixes.storage) 26 | var blobPrivateDnsZoneVirtualNetworkLinkName = format('{0}-link-{1}', resourceName, take(toLower(uniqueString(resourceName, virtualNetworkName)), 4)) 27 | 28 | // Private DNS Zones 29 | resource blobPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = { 30 | name: blobPrivateDNSZoneName 31 | location: 'global' 32 | tags: tags 33 | properties: {} 34 | dependsOn: [ 35 | vnet 36 | ] 37 | } 38 | 39 | // Virtual Network Links 40 | resource blobPrivateDnsZoneVirtualNetworkLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = { 41 | parent: blobPrivateDnsZone 42 | name: blobPrivateDnsZoneVirtualNetworkLinkName 43 | location: 'global' 44 | tags: tags 45 | properties: { 46 | registrationEnabled: false 47 | virtualNetwork: { 48 | id: vnet.id 49 | } 50 | } 51 | } 52 | 53 | // Private Endpoints 54 | resource blobPrivateEndpoint 'Microsoft.Network/privateEndpoints@2021-08-01' = { 55 | name: 'blob-private-endpoint' 56 | location: location 57 | tags: tags 58 | properties: { 59 | privateLinkServiceConnections: [ 60 | { 61 | name: 'blobPrivateLinkConnection' 62 | properties: { 63 | privateLinkServiceId: storageAccount.id 64 | groupIds: [ 65 | 'blob' 66 | ] 67 | } 68 | } 69 | ] 70 | subnet: { 71 | id: '${vnet.id}/subnets/${subnetName}' 72 | } 73 | } 74 | } 75 | 76 | resource blobPrivateDnsZoneGroupName 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-01-01' = { 77 | parent: blobPrivateEndpoint 78 | name: 'blobPrivateDnsZoneGroup' 79 | properties: { 80 | privateDnsZoneConfigs: [ 81 | { 82 | name: 'storageBlobARecord' 83 | properties: { 84 | privateDnsZoneId: blobPrivateDnsZone.id 85 | } 86 | } 87 | ] 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/app/vnet.bicep: -------------------------------------------------------------------------------- 1 | @description('Specifies the name of the virtual network.') 2 | param vNetName string 3 | 4 | @description('Specifies the location.') 5 | param location string = resourceGroup().location 6 | 7 | @description('Specifies the name of the subnet for the Service Bus private endpoint.') 8 | param peSubnetName string = 'private-endpoints-subnet' 9 | 10 | @description('Specifies the name of the subnet for Function App virtual network integration.') 11 | param appSubnetName string = 'app' 12 | 13 | param tags object = {} 14 | 15 | resource virtualNetwork 'Microsoft.Network/virtualNetworks@2023-05-01' = { 16 | name: vNetName 17 | location: location 18 | tags: tags 19 | properties: { 20 | addressSpace: { 21 | addressPrefixes: [ 22 | '10.0.0.0/16' 23 | ] 24 | } 25 | encryption: { 26 | enabled: false 27 | enforcement: 'AllowUnencrypted' 28 | } 29 | subnets: [ 30 | { 31 | name: peSubnetName 32 | id: resourceId('Microsoft.Network/virtualNetworks/subnets', vNetName, 'private-endpoints-subnet') 33 | properties: { 34 | addressPrefixes: [ 35 | '10.0.1.0/24' 36 | ] 37 | delegations: [] 38 | privateEndpointNetworkPolicies: 'Disabled' 39 | privateLinkServiceNetworkPolicies: 'Enabled' 40 | } 41 | type: 'Microsoft.Network/virtualNetworks/subnets' 42 | } 43 | { 44 | name: appSubnetName 45 | id: resourceId('Microsoft.Network/virtualNetworks/subnets', vNetName, 'app') 46 | properties: { 47 | addressPrefixes: [ 48 | '10.0.2.0/24' 49 | ] 50 | delegations: [ 51 | { 52 | name: 'delegation' 53 | id: resourceId('Microsoft.Network/virtualNetworks/subnets/delegations', vNetName, 'app', 'delegation') 54 | properties: { 55 | //Microsoft.App/environments is the correct delegation for Flex Consumption VNet integration 56 | serviceName: 'Microsoft.App/environments' 57 | } 58 | type: 'Microsoft.Network/virtualNetworks/subnets/delegations' 59 | } 60 | ] 61 | privateEndpointNetworkPolicies: 'Disabled' 62 | privateLinkServiceNetworkPolicies: 'Enabled' 63 | } 64 | type: 'Microsoft.Network/virtualNetworks/subnets' 65 | } 66 | ] 67 | virtualNetworkPeerings: [] 68 | enableDdosProtection: false 69 | } 70 | } 71 | 72 | output peSubnetName string = virtualNetwork.properties.subnets[0].name 73 | output peSubnetID string = virtualNetwork.properties.subnets[0].id 74 | output appSubnetName string = virtualNetwork.properties.subnets[1].name 75 | output appSubnetID string = virtualNetwork.properties.subnets[1].id 76 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/host/appserviceplan.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param kind string = '' 6 | param reserved bool = true 7 | param sku object 8 | 9 | resource appServicePlan 'Microsoft.Web/serverfarms@2023-12-01' = { 10 | name: name 11 | location: location 12 | tags: tags 13 | sku: sku 14 | kind: kind 15 | properties: { 16 | reserved: reserved 17 | } 18 | } 19 | 20 | output id string = appServicePlan.id 21 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/host/functions-flexconsumption.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | // Reference Properties 6 | param applicationInsightsName string = '' 7 | param appServicePlanId string 8 | param storageAccountName string 9 | param virtualNetworkSubnetId string = '' 10 | @allowed(['SystemAssigned', 'UserAssigned']) 11 | param identityType string 12 | @description('User assigned identity name') 13 | param identityId string 14 | 15 | // Runtime Properties 16 | @allowed([ 17 | 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' 18 | ]) 19 | param runtimeName string 20 | @allowed(['3.10', '3.11', '7.4', '8.0', '10', '11', '17', '20']) 21 | param runtimeVersion string 22 | param kind string = 'functionapp,linux' 23 | 24 | // Microsoft.Web/sites/config 25 | param appSettings object = {} 26 | param instanceMemoryMB int = 2048 27 | param maximumInstanceCount int = 100 28 | param deploymentStorageContainerName string 29 | 30 | resource stg 'Microsoft.Storage/storageAccounts@2022-09-01' existing = { 31 | name: storageAccountName 32 | } 33 | 34 | resource functions 'Microsoft.Web/sites@2023-12-01' = { 35 | name: name 36 | location: location 37 | tags: tags 38 | kind: kind 39 | identity: { 40 | type: identityType 41 | userAssignedIdentities: { 42 | '${identityId}': {} 43 | } 44 | } 45 | properties: { 46 | serverFarmId: appServicePlanId 47 | functionAppConfig: { 48 | deployment: { 49 | storage: { 50 | type: 'blobContainer' 51 | value: '${stg.properties.primaryEndpoints.blob}${deploymentStorageContainerName}' 52 | authentication: { 53 | type: identityType == 'SystemAssigned' ? 'SystemAssignedIdentity' : 'UserAssignedIdentity' 54 | userAssignedIdentityResourceId: identityType == 'UserAssigned' ? identityId : '' 55 | } 56 | } 57 | } 58 | scaleAndConcurrency: { 59 | instanceMemoryMB: instanceMemoryMB 60 | maximumInstanceCount: maximumInstanceCount 61 | } 62 | runtime: { 63 | name: runtimeName 64 | version: runtimeVersion 65 | } 66 | } 67 | virtualNetworkSubnetId: virtualNetworkSubnetId 68 | } 69 | 70 | resource configAppSettings 'config' = { 71 | name: 'appsettings' 72 | properties: union(appSettings, 73 | { 74 | AzureWebJobsStorage__accountName: stg.name 75 | AzureWebJobsStorage__credential : 'managedidentity' 76 | APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString 77 | }) 78 | } 79 | } 80 | 81 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) { 82 | name: applicationInsightsName 83 | } 84 | 85 | output name string = functions.name 86 | output uri string = 'https://${functions.properties.defaultHostName}' 87 | output identityPrincipalId string = identityType == 'SystemAssigned' ? functions.identity.principalId : '' 88 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/identity/userAssignedIdentity.bicep: -------------------------------------------------------------------------------- 1 | param identityName string 2 | param location string 3 | param tags object = {} 4 | 5 | resource userAssignedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' = { 6 | name: identityName 7 | location: location 8 | tags: tags 9 | } 10 | 11 | output identityId string = userAssignedIdentity.id 12 | output identityName string = userAssignedIdentity.name 13 | output identityPrincipalId string = userAssignedIdentity.properties.principalId 14 | output identityClientId string = userAssignedIdentity.properties.clientId 15 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/monitor/appinsights-access.bicep: -------------------------------------------------------------------------------- 1 | param principalID string 2 | param roleDefinitionID string 3 | param appInsightsName string 4 | 5 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = { 6 | name: appInsightsName 7 | } 8 | 9 | // Allow access from API to app insights using a managed identity and least priv role 10 | resource appInsightsRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { 11 | name: guid(applicationInsights.id, principalID, roleDefinitionID) 12 | scope: applicationInsights 13 | properties: { 14 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionID) 15 | principalId: principalID 16 | principalType: 'ServicePrincipal' // Workaround for https://learn.microsoft.com/en-us/azure/role-based-access-control/role-assignments-template#new-service-principal 17 | } 18 | } 19 | 20 | output ROLE_ASSIGNMENT_NAME string = appInsightsRoleAssignment.name 21 | 22 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/monitor/applicationinsights.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param logAnalyticsWorkspaceId string 6 | param disableLocalAuth bool = false 7 | 8 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { 9 | name: name 10 | location: location 11 | tags: tags 12 | kind: 'web' 13 | properties: { 14 | Application_Type: 'web' 15 | WorkspaceResourceId: logAnalyticsWorkspaceId 16 | DisableLocalAuth: disableLocalAuth 17 | } 18 | } 19 | 20 | output connectionString string = applicationInsights.properties.ConnectionString 21 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey 22 | output name string = applicationInsights.name 23 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/monitor/loganalytics.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = { 6 | name: name 7 | location: location 8 | tags: tags 9 | properties: any({ 10 | retentionInDays: 30 11 | features: { 12 | searchVersion: 1 13 | } 14 | sku: { 15 | name: 'PerGB2018' 16 | } 17 | }) 18 | } 19 | 20 | output id string = logAnalytics.id 21 | output name string = logAnalytics.name 22 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/monitor/monitoring.bicep: -------------------------------------------------------------------------------- 1 | param logAnalyticsName string 2 | param applicationInsightsName string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | param disableLocalAuth bool = false 6 | 7 | module logAnalytics 'loganalytics.bicep' = { 8 | name: 'loganalytics' 9 | params: { 10 | name: logAnalyticsName 11 | location: location 12 | tags: tags 13 | } 14 | } 15 | 16 | module applicationInsights 'applicationinsights.bicep' = { 17 | name: 'applicationinsights' 18 | params: { 19 | name: applicationInsightsName 20 | location: location 21 | tags: tags 22 | logAnalyticsWorkspaceId: logAnalytics.outputs.id 23 | disableLocalAuth: disableLocalAuth 24 | } 25 | } 26 | 27 | output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString 28 | output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey 29 | output applicationInsightsName string = applicationInsights.outputs.name 30 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id 31 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name 32 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/core/storage/storage-account.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param allowBlobPublicAccess bool = false 6 | @allowed(['Enabled', 'Disabled']) 7 | param publicNetworkAccess string = 'Enabled' 8 | param containers array = [] 9 | param kind string = 'StorageV2' 10 | param minimumTlsVersion string = 'TLS1_2' 11 | param sku object = { name: 'Standard_LRS' } 12 | 13 | resource storage 'Microsoft.Storage/storageAccounts@2023-01-01' = { 14 | name: name 15 | location: location 16 | tags: tags 17 | kind: kind 18 | sku: sku 19 | properties: { 20 | minimumTlsVersion: minimumTlsVersion 21 | allowBlobPublicAccess: allowBlobPublicAccess 22 | publicNetworkAccess: publicNetworkAccess 23 | allowSharedKeyAccess: false 24 | networkAcls: { 25 | bypass: 'AzureServices' 26 | defaultAction: 'Allow' 27 | } 28 | } 29 | 30 | resource blobServices 'blobServices' = if (!empty(containers)) { 31 | name: 'default' 32 | resource container 'containers' = [for container in containers: { 33 | name: container.name 34 | properties: { 35 | publicAccess: contains(container, 'publicAccess') ? container.publicAccess : 'None' 36 | } 37 | }] 38 | } 39 | } 40 | 41 | output name string = storage.name 42 | output primaryEndpoints object = storage.properties.primaryEndpoints 43 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/main.bicep: -------------------------------------------------------------------------------- 1 | targetScope = 'subscription' 2 | 3 | @minLength(1) 4 | @maxLength(64) 5 | @description('Name of the the environment which is used to generate a short unique hash used in all resources.') 6 | param environmentName string 7 | 8 | @minLength(1) 9 | @description('Primary location for all resources') 10 | @allowed(['australiaeast', 'eastasia', 'eastus', 'eastus2', 'northeurope', 'southcentralus', 'southeastasia', 'swedencentral', 'uksouth', 'westus2', 'eastus2euap']) 11 | @metadata({ 12 | azd: { 13 | type: 'location' 14 | } 15 | }) 16 | param location string 17 | 18 | param processorServiceName string = '' 19 | param processorUserAssignedIdentityName string = '' 20 | param applicationInsightsName string = '' 21 | param appServicePlanName string = '' 22 | param logAnalyticsName string = '' 23 | param resourceGroupName string = '' 24 | param storageAccountName string = '' 25 | param vNetName string = '' 26 | param disableLocalAuth bool = true 27 | 28 | var abbrs = loadJsonContent('./abbreviations.json') 29 | var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) 30 | var tags = { 'azd-env-name': environmentName } 31 | var functionAppName = !empty(processorServiceName) ? processorServiceName : '${abbrs.webSitesFunctions}processor-${resourceToken}' 32 | var deploymentStorageContainerName = 'app-package-${take(functionAppName, 32)}-${take(toLower(uniqueString(functionAppName, resourceToken)), 7)}' 33 | 34 | // Organize resources in a resource group 35 | resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = { 36 | name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' 37 | location: location 38 | tags: tags 39 | } 40 | 41 | // User assigned managed identity to be used by the function app to reach storage and service bus 42 | module processorUserAssignedIdentity './core/identity/userAssignedIdentity.bicep' = { 43 | name: 'processorUserAssignedIdentity' 44 | scope: rg 45 | params: { 46 | location: location 47 | tags: tags 48 | identityName: !empty(processorUserAssignedIdentityName) ? processorUserAssignedIdentityName : '${abbrs.managedIdentityUserAssignedIdentities}processor-${resourceToken}' 49 | } 50 | } 51 | 52 | // The application backend is a function app 53 | module appServicePlan './core/host/appserviceplan.bicep' = { 54 | name: 'appserviceplan' 55 | scope: rg 56 | params: { 57 | name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}' 58 | location: location 59 | tags: tags 60 | sku: { 61 | name: 'FC1' 62 | tier: 'FlexConsumption' 63 | } 64 | } 65 | } 66 | 67 | module processor './app/processor.bicep' = { 68 | name: 'processor' 69 | scope: rg 70 | params: { 71 | name: functionAppName 72 | location: location 73 | tags: tags 74 | applicationInsightsName: monitoring.outputs.applicationInsightsName 75 | appServicePlanId: appServicePlan.outputs.id 76 | runtimeName: 'dotnet-isolated' 77 | runtimeVersion: '8.0' 78 | storageAccountName: storage.outputs.name 79 | deploymentStorageContainerName: deploymentStorageContainerName 80 | identityId: processorUserAssignedIdentity.outputs.identityId 81 | identityClientId: processorUserAssignedIdentity.outputs.identityClientId 82 | appSettings: { 83 | } 84 | virtualNetworkSubnetId: serviceVirtualNetwork.outputs.appSubnetID 85 | } 86 | } 87 | 88 | // Backing storage for Azure functions processor 89 | module storage './core/storage/storage-account.bicep' = { 90 | name: 'storage' 91 | scope: rg 92 | params: { 93 | name: !empty(storageAccountName) ? storageAccountName : '${abbrs.storageStorageAccounts}${resourceToken}' 94 | location: location 95 | tags: tags 96 | containers: [{name: deploymentStorageContainerName}] 97 | publicNetworkAccess: 'Disabled' 98 | } 99 | } 100 | 101 | var storageRoleDefinitionId = 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' //Storage Blob Data Owner role 102 | 103 | // Allow access from processor to storage account using a managed identity 104 | module storageRoleAssignmentApi 'app/storage-Access.bicep' = { 105 | name: 'storageRoleAssignmentPRocessor' 106 | scope: rg 107 | params: { 108 | storageAccountName: storage.outputs.name 109 | roleDefinitionID: storageRoleDefinitionId 110 | principalID: processorUserAssignedIdentity.outputs.identityPrincipalId 111 | } 112 | } 113 | 114 | // Virtual Network & private endpoint to blob storage 115 | module serviceVirtualNetwork 'app/vnet.bicep' = { 116 | name: 'serviceVirtualNetwork' 117 | scope: rg 118 | params: { 119 | location: location 120 | tags: tags 121 | vNetName: !empty(vNetName) ? vNetName : '${abbrs.networkVirtualNetworks}${resourceToken}' 122 | } 123 | } 124 | 125 | module storagePrivateEndpoint 'app/storage-PrivateEndpoint.bicep' = { 126 | name: 'servicePrivateEndpoint' 127 | scope: rg 128 | params: { 129 | location: location 130 | tags: tags 131 | virtualNetworkName: !empty(vNetName) ? vNetName : '${abbrs.networkVirtualNetworks}${resourceToken}' 132 | subnetName: serviceVirtualNetwork.outputs.peSubnetName 133 | resourceName: storage.outputs.name 134 | } 135 | } 136 | 137 | // Monitor application with Azure Monitor 138 | module monitoring './core/monitor/monitoring.bicep' = { 139 | name: 'monitoring' 140 | scope: rg 141 | params: { 142 | location: location 143 | tags: tags 144 | logAnalyticsName: !empty(logAnalyticsName) ? logAnalyticsName : '${abbrs.operationalInsightsWorkspaces}${resourceToken}' 145 | applicationInsightsName: !empty(applicationInsightsName) ? applicationInsightsName : '${abbrs.insightsComponents}${resourceToken}' 146 | disableLocalAuth: disableLocalAuth 147 | } 148 | } 149 | 150 | var monitoringRoleDefinitionId = '3913510d-42f4-4e42-8a64-420c390055eb' // Monitoring Metrics Publisher role ID 151 | 152 | // Allow access from processor to application insights using a managed identity 153 | module appInsightsRoleAssignmentApi './core/monitor/appinsights-access.bicep' = { 154 | name: 'appInsightsRoleAssignmentPRocessor' 155 | scope: rg 156 | params: { 157 | appInsightsName: monitoring.outputs.applicationInsightsName 158 | roleDefinitionID: monitoringRoleDefinitionId 159 | principalID: processorUserAssignedIdentity.outputs.identityPrincipalId 160 | } 161 | } 162 | 163 | // App outputs 164 | output APPLICATIONINSIGHTS_CONNECTION_STRING string = monitoring.outputs.applicationInsightsConnectionString 165 | output AZURE_LOCATION string = location 166 | output AZURE_TENANT_ID string = tenant().tenantId 167 | output SERVICE_PROCESSOR_NAME string = processor.outputs.SERVICE_PROCESSOR_NAME 168 | output AZURE_FUNCTION_NAME string = processor.outputs.SERVICE_PROCESSOR_NAME 169 | -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/infra/main.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environmentName": { 6 | "value": "${AZURE_ENV_NAME}" 7 | }, 8 | "location": { 9 | "value": "${AZURE_LOCATION}" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /AzureCosmosDB/csharp/next-steps.md: -------------------------------------------------------------------------------- 1 | # Next Steps after `azd init` 2 | 3 | ## Table of Contents 4 | 5 | 1. [Next Steps](#next-steps) 6 | 2. [What was added](#what-was-added) 7 | 3. [Billing](#billing) 8 | 4. [Troubleshooting](#troubleshooting) 9 | 10 | ## Next Steps 11 | 12 | ### Provision infrastructure and deploy application code 13 | 14 | Run `azd up` to provision your infrastructure and deploy to Azure (or run `azd provision` then `azd deploy` to accomplish the tasks separately). Visit the service endpoints listed to see your application up-and-running! 15 | 16 | To troubleshoot any issues, see [troubleshooting](#troubleshooting). 17 | 18 | ### Configure environment variables for running services 19 | 20 | Configure environment variables for running services by updating `settings` in [main.parameters.json](./infra/main.parameters.json). 21 | 22 | ### Configure CI/CD pipeline 23 | 24 | 1. Create a workflow pipeline file locally. The following starters are available: 25 | - [Deploy with GitHub Actions](https://github.com/Azure-Samples/azd-starter-bicep/blob/main/.github/workflows/azure-dev.yml) 26 | - [Deploy with Azure Pipelines](https://github.com/Azure-Samples/azd-starter-bicep/blob/main/.azdo/pipelines/azure-dev.yml) 27 | 2. Run `azd pipeline config` to configure the deployment pipeline to connect securely to Azure. 28 | 29 | ## What was added 30 | 31 | ### Infrastructure configuration 32 | 33 | To describe the infrastructure and application, `azure.yaml` along with Infrastructure as Code files using Bicep were added with the following directory structure: 34 | 35 | ```yaml 36 | - azure.yaml # azd project configuration 37 | - infra/ # Infrastructure as Code (bicep) files 38 | - main.bicep # main deployment module 39 | - app/ # Application resource modules 40 | - shared/ # Shared resource modules 41 | - modules/ # Library modules 42 | ``` 43 | 44 | Each bicep file declares resources to be provisioned. The resources are provisioned when running `azd up` or `azd provision`. 45 | 46 | - [app/DocumentVectorPipelineFunctions.bicep](./infra/app/DocumentVectorPipelineFunctions.bicep) - Azure Container Apps resources to host the 'DocumentVectorPipelineFunctions' service. 47 | - [shared/keyvault.bicep](./infra/shared/keyvault.bicep) - Azure KeyVault to store secrets. 48 | - [shared/monitoring.bicep](./infra/shared/monitoring.bicep) - Azure Log Analytics workspace and Application Insights to log and store instrumentation logs. 49 | - [shared/registry.bicep](./infra/shared/registry.bicep) - Azure Container Registry to store docker images. 50 | 51 | More information about [Bicep](https://aka.ms/bicep) language. 52 | 53 | ### Build from source (no Dockerfile) 54 | 55 | #### Build with Buildpacks using Oryx 56 | 57 | If your project does not contain a Dockerfile, we will use [Buildpacks](https://buildpacks.io/) using [Oryx](https://github.com/microsoft/Oryx/blob/main/doc/README.md) to create an image for the services in `azure.yaml` and get your containerized app onto Azure. 58 | 59 | To produce and run the docker image locally: 60 | 61 | 1. Run `azd package` to build the image. 62 | 2. Copy the *Image Tag* shown. 63 | 3. Run `docker run -it ` to run the image locally. 64 | 65 | #### Exposed port 66 | 67 | Oryx will automatically set `PORT` to a default value of `80` (port `8080` for Java). Additionally, it will auto-configure supported web servers such as `gunicorn` and `ASP .NET Core` to listen to the target `PORT`. If your application already listens to the port specified by the `PORT` variable, the application will work out-of-the-box. Otherwise, you may need to perform one of the steps below: 68 | 69 | 1. Update your application code or configuration to listen to the port specified by the `PORT` variable 70 | 1. (Alternatively) Search for `targetPort` in a .bicep file under the `infra/app` folder, and update the variable to match the port used by the application. 71 | 72 | ## Billing 73 | 74 | Visit the *Cost Management + Billing* page in Azure Portal to track current spend. For more information about how you're billed, and how you can monitor the costs incurred in your Azure subscriptions, visit [billing overview](https://learn.microsoft.com/azure/developer/intro/azure-developer-billing). 75 | 76 | ## Troubleshooting 77 | 78 | Q: I visited the service endpoint listed, and I'm seeing a blank page, a generic welcome page, or an error page. 79 | 80 | A: Your service may have failed to start, or it may be missing some configuration settings. To investigate further: 81 | 82 | 1. Run `azd show`. Click on the link under "View in Azure Portal" to open the resource group in Azure Portal. 83 | 2. Navigate to the specific Container App service that is failing to deploy. 84 | 3. Click on the failing revision under "Revisions with Issues". 85 | 4. Review "Status details" for more information about the type of failure. 86 | 5. Observe the log outputs from Console log stream and System log stream to identify any errors. 87 | 6. If logs are written to disk, use *Console* in the navigation to connect to a shell within the running container. 88 | 89 | For more troubleshooting information, visit [Container Apps troubleshooting](https://learn.microsoft.com/azure/container-apps/troubleshooting). 90 | 91 | ### Additional information 92 | 93 | For additional information about setting up your `azd` project, visit our official [docs](https://learn.microsoft.com/azure/developer/azure-developer-cli/make-azd-compatible?pivots=azd-convert). 94 | -------------------------------------------------------------------------------- /AzureSQL/csharp/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto-detect text files 2 | * text=auto working-tree-encoding=UTF-8 3 | 4 | # VS files 5 | *.*proj text eol=crlf 6 | *.sln text eol=crlf 7 | 8 | # Bash scripts 9 | *.sh text eol=lf 10 | *.cmd text eol=crlf 11 | -------------------------------------------------------------------------------- /AzureSQL/csharp/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | [Ll]og/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | project.fragment.lock.json 46 | artifacts/ 47 | 48 | *_i.c 49 | *_p.c 50 | *_i.h 51 | *.ilk 52 | *.meta 53 | *.obj 54 | *.pch 55 | *.pdb 56 | *.pgc 57 | *.pgd 58 | *.rsp 59 | *.sbr 60 | *.tlb 61 | *.tli 62 | *.tlh 63 | *.tmp 64 | *.tmp_proj 65 | *.log 66 | *.vspscc 67 | *.vssscc 68 | .builds 69 | *.pidb 70 | *.svclog 71 | *.scc 72 | 73 | # Chutzpah Test files 74 | _Chutzpah* 75 | 76 | # Visual C++ cache files 77 | ipch/ 78 | *.aps 79 | *.ncb 80 | *.opendb 81 | *.opensdf 82 | *.sdf 83 | *.cachefile 84 | *.VC.db 85 | *.VC.VC.opendb 86 | 87 | # Visual Studio profiler 88 | *.psess 89 | *.vsp 90 | *.vspx 91 | *.sap 92 | 93 | # TFS 2012 Local Workspace 94 | $tf/ 95 | 96 | # Guidance Automation Toolkit 97 | *.gpState 98 | 99 | # ReSharper is a .NET coding add-in 100 | _ReSharper*/ 101 | *.[Rr]e[Ss]harper 102 | *.DotSettings.user 103 | 104 | # JustCode is a .NET coding add-in 105 | .JustCode 106 | 107 | # TeamCity is a build add-in 108 | _TeamCity* 109 | 110 | # DotCover is a Code Coverage Tool 111 | *.dotCover 112 | 113 | # NCrunch 114 | _NCrunch_* 115 | .*crunch*.local.xml 116 | nCrunchTemp_* 117 | 118 | # MightyMoose 119 | *.mm.* 120 | AutoTest.Net/ 121 | 122 | # Web workbench (sass) 123 | .sass-cache/ 124 | 125 | # Installshield output folder 126 | [Ee]xpress/ 127 | 128 | # DocProject is a documentation generator add-in 129 | DocProject/buildhelp/ 130 | DocProject/Help/*.HxT 131 | DocProject/Help/*.HxC 132 | DocProject/Help/*.hhc 133 | DocProject/Help/*.hhk 134 | DocProject/Help/*.hhp 135 | DocProject/Help/Html2 136 | DocProject/Help/html 137 | 138 | # Click-Once directory 139 | publish/ 140 | 141 | # Publish Web Output 142 | *.[Pp]ublish.xml 143 | *.azurePubxml 144 | # TODO: Comment the next line if you want to checkin your web deploy settings 145 | # but database connection strings (with potential passwords) will be unencrypted 146 | #*.pubxml 147 | *.publishproj 148 | 149 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 150 | # checkin your Azure Web App publish settings, but sensitive information contained 151 | # in these scripts will be unencrypted 152 | PublishScripts/ 153 | 154 | # NuGet Packages 155 | *.nupkg 156 | # The packages folder can be ignored because of Package Restore 157 | **/packages/* 158 | # except build/, which is used as an MSBuild target. 159 | !**/packages/build/ 160 | # Uncomment if necessary however generally it will be regenerated when needed 161 | #!**/packages/repositories.config 162 | # NuGet v3's project.json files produces more ignoreable files 163 | *.nuget.props 164 | *.nuget.targets 165 | 166 | # Microsoft Azure Build Output 167 | csx/ 168 | *.build.csdef 169 | 170 | # Microsoft Azure Emulator 171 | ecf/ 172 | rcf/ 173 | 174 | # Windows Store app package directories and files 175 | AppPackages/ 176 | BundleArtifacts/ 177 | Package.StoreAssociation.xml 178 | _pkginfo.txt 179 | 180 | # Visual Studio cache files 181 | # files ending in .cache can be ignored 182 | *.[Cc]ache 183 | # but keep track of directories ending in .cache 184 | !*.[Cc]ache/ 185 | 186 | # Others 187 | ClientBin/ 188 | ~$* 189 | *~ 190 | *.dbmdl 191 | *.dbproj.schemaview 192 | *.jfm 193 | *.pfx 194 | *.publishsettings 195 | node_modules/ 196 | orleans.codegen.cs 197 | 198 | # Since there are multiple workflows, uncomment next line to ignore bower_components 199 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 200 | #bower_components/ 201 | 202 | # RIA/Silverlight projects 203 | Generated_Code/ 204 | 205 | # Backup & report files from converting an old project file 206 | # to a newer Visual Studio version. Backup files are not needed, 207 | # because we have git ;-) 208 | _UpgradeReport_Files/ 209 | Backup*/ 210 | UpgradeLog*.XML 211 | UpgradeLog*.htm 212 | 213 | # SQL Server files 214 | *.mdf 215 | *.ldf 216 | 217 | # Business Intelligence projects 218 | *.rdl.data 219 | *.bim.layout 220 | *.bim_*.settings 221 | 222 | # Microsoft Fakes 223 | FakesAssemblies/ 224 | 225 | # GhostDoc plugin setting file 226 | *.GhostDoc.xml 227 | 228 | # Node.js Tools for Visual Studio 229 | .ntvs_analysis.dat 230 | 231 | # Visual Studio 6 build log 232 | *.plg 233 | 234 | # Visual Studio 6 workspace options file 235 | *.opt 236 | 237 | # Visual Studio LightSwitch build output 238 | **/*.HTMLClient/GeneratedArtifacts 239 | **/*.DesktopClient/GeneratedArtifacts 240 | **/*.DesktopClient/ModelManifest.xml 241 | **/*.Server/GeneratedArtifacts 242 | **/*.Server/ModelManifest.xml 243 | _Pvt_Extensions 244 | 245 | # Paket dependency manager 246 | .paket/paket.exe 247 | paket-files/ 248 | 249 | # FAKE - F# Make 250 | .fake/ 251 | 252 | # JetBrains Rider 253 | .idea/ 254 | *.sln.iml 255 | 256 | # CodeRush 257 | .cr/ 258 | 259 | # Python Tools for Visual Studio (PTVS) 260 | __pycache__/ 261 | *.pyc 262 | /DocumentVectorPipelineFunctions/local.settings.json 263 | *local.settings.json 264 | 265 | /DocumentVectorPipelineFunctions/local.settings.json.dm 266 | /DocumentVectorPipelineFunctions/local.settings.json 267 | 268 | # Custom 269 | *.zip -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipeline.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.11.35017.193 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DocumentVectorPipelineFunctions", "DocumentVectorPipelineFunctions\DocumentVectorPipelineFunctions.csproj", "{8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{6F566C00-03BA-4C9D-9470-5AE124B2641A}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {8E3CEECC-1BCE-4D7C-B03F-6C07CF8BBB0F}.Release|Any CPU.Build.0 = Release|Any CPU 20 | EndGlobalSection 21 | GlobalSection(SolutionProperties) = preSolution 22 | HideSolutionNode = FALSE 23 | EndGlobalSection 24 | GlobalSection(ExtensibilityGlobals) = postSolution 25 | SolutionGuid = {CCA21164-A156-47CF-9371-C64CAEB32C7A} 26 | EndGlobalSection 27 | EndGlobal 28 | -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/BlobTriggerFunction.cs: -------------------------------------------------------------------------------- 1 | using System.ClientModel; 2 | using System.Data; 3 | using System.Globalization; 4 | using System.Net; 5 | using System.Text.Json; 6 | using Azure; 7 | using Azure.AI.FormRecognizer.DocumentAnalysis; 8 | using Azure.Core; 9 | using Azure.Identity; 10 | using Azure.Storage.Blobs; 11 | using Dapper; 12 | using Microsoft.Azure.Functions.Worker; 13 | using Microsoft.Data.SqlClient; 14 | using Microsoft.Extensions.Configuration; 15 | using Microsoft.Extensions.Logging; 16 | using OpenAI.Embeddings; 17 | 18 | namespace DocumentVectorPipelineFunctions; 19 | 20 | public class BlobTriggerFunction( 21 | IConfiguration configuration, 22 | DocumentAnalysisClient documentAnalysisClient, 23 | ILoggerFactory loggerFactory, 24 | EmbeddingClient embeddingClient) 25 | { 26 | private const string AzureOpenAIModelDeploymentDimensionsName = "AzureOpenAIModelDimensions"; 27 | private const string SqlConnectionString = "SqlConnectionString"; 28 | 29 | private readonly ILogger _logger = loggerFactory.CreateLogger(); 30 | 31 | private readonly string TableSchemaName = Environment.GetEnvironmentVariable("DocumentTableSchema") ?? "dbo"; 32 | private readonly string TableName = Environment.GetEnvironmentVariable("DocumentTableName") ?? "documents"; 33 | 34 | string? managedIdentityClientId = Environment.GetEnvironmentVariable("AzureManagedIdentityClientId"); 35 | 36 | private static readonly int DefaultDimensions = 1536; 37 | 38 | private const int MaxRetryCount = 100; 39 | private const int RetryDelay = 10 * 1000; // 10 seconds 40 | 41 | private const int MaxBatchSize = 10; 42 | private const int MaxDegreeOfParallelism = 50; 43 | 44 | private int embeddingDimensions = DefaultDimensions; 45 | 46 | [Function("BlobTriggerFunction")] 47 | public async Task Run([BlobTrigger("documents/{name}", Connection = "AzureBlobStorageAccConnectionString")] BlobClient blobClient) 48 | { 49 | this._logger.LogInformation("Starting processing of blob name: '{name}'", blobClient.Name); 50 | 51 | if (await blobClient.ExistsAsync()) 52 | { 53 | await this.HandleBlobCreateEventAsync(blobClient); 54 | } 55 | else 56 | { 57 | await this.HandleBlobDeleteEventAsync(blobClient); 58 | } 59 | this._logger.LogInformation("Finished processing of blob name: '{name}'", blobClient.Name); 60 | } 61 | 62 | private async Task HandleBlobCreateEventAsync(BlobClient blobClient) 63 | { 64 | embeddingDimensions = configuration.GetValue(AzureOpenAIModelDeploymentDimensionsName, DefaultDimensions); 65 | var connectionString = configuration.GetValue(SqlConnectionString); 66 | _logger.LogInformation("Using OpenAI model dimensions: '{embeddingDimensions}'.", embeddingDimensions); 67 | 68 | _logger.LogInformation("Analyzing document using DocumentAnalyzerService from blobUri: '{blobUri}' using layout: {layout}", blobClient.Name, "prebuilt-read"); 69 | 70 | using MemoryStream memoryStream = new MemoryStream(); 71 | await blobClient.DownloadToAsync(memoryStream); 72 | memoryStream.Seek(0, SeekOrigin.Begin); 73 | 74 | var operation = await documentAnalysisClient.AnalyzeDocumentAsync( 75 | WaitUntil.Completed, 76 | "prebuilt-read", 77 | memoryStream); 78 | 79 | var result = operation.Value; 80 | _logger.LogInformation("Extracted content from '{name}', # pages {pageCount}", blobClient.Name, result.Pages.Count); 81 | 82 | var textChunks = TextChunker.FixedSizeChunking(result); 83 | 84 | var listOfBatches = new List>(); 85 | 86 | int totalChunksCount = 0; 87 | var batchChunkTexts = new List(MaxBatchSize); 88 | for (int i = 0; i <= textChunks.Count(); i++) 89 | { 90 | if (i == textChunks.Count()) 91 | { 92 | if (batchChunkTexts.Count > 0) 93 | { 94 | listOfBatches.Add(new List(batchChunkTexts)); 95 | } 96 | batchChunkTexts.Clear(); 97 | 98 | break; 99 | } 100 | 101 | batchChunkTexts.Add(textChunks.ElementAt(i)); 102 | totalChunksCount++; 103 | 104 | if (batchChunkTexts.Count >= MaxBatchSize) 105 | { 106 | listOfBatches.Add(new List(batchChunkTexts)); 107 | batchChunkTexts.Clear(); 108 | } 109 | } 110 | 111 | _logger.LogInformation("Processing list of batches in parallel, total batches: {listSize}, chunks count: {chunksCount}", listOfBatches.Count(), totalChunksCount); 112 | 113 | await EnsureDocumentTableExistsAsync(connectionString); 114 | 115 | await Parallel.ForEachAsync(listOfBatches, new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, async (batchChunkTexts, cancellationToken) => 116 | { 117 | _logger.LogInformation("Processing batch of size: {batchSize}", batchChunkTexts.Count); 118 | 119 | if (batchChunkTexts.Count > 0) 120 | { 121 | var embeddings = await GenerateEmbeddingsWithRetryAsync(batchChunkTexts); 122 | _logger.LogInformation("Embeddings generated: {0}", embeddings.Count); 123 | 124 | if (embeddings.Count > 0) 125 | { 126 | // Save into Azure SQL 127 | _logger.LogInformation("Begin Saving data in Azure SQL"); 128 | 129 | for (int index = 0; index < batchChunkTexts.Count; index++) 130 | { 131 | using (var connection = new SqlConnection(connectionString)) 132 | { 133 | string SanitizedName = SantizeDatabaseObjectName(TableSchemaName) + "." + SantizeDatabaseObjectName(TableName); 134 | string insertQuery = $@"INSERT INTO {SanitizedName} (ChunkId, DocumentUrl, Embedding, ChunkText, PageNumber) VALUES (@ChunkId, @DocumentUrl, @Embedding, @ChunkText, @PageNumber);"; 135 | 136 | var doc = new Document() 137 | { 138 | ChunkId = batchChunkTexts[index].ChunkNumber, 139 | DocumentUrl = blobClient.Uri.AbsoluteUri, 140 | Embedding = JsonSerializer.Serialize(embeddings[index].Vector), 141 | ChunkText = batchChunkTexts[index].Text, 142 | PageNumber = batchChunkTexts[index].PageNumberIfKnown, 143 | }; 144 | //connection.AccessToken = token.Token; 145 | var result = connection.Execute(insertQuery, doc); 146 | } 147 | } 148 | 149 | _logger.LogInformation("End Saving data in Azure SQL"); 150 | } 151 | } 152 | }); 153 | 154 | _logger.LogInformation("Finished processing blob {name}, total chunks processed {count}.", blobClient.Name, totalChunksCount); 155 | } 156 | 157 | private async Task GenerateEmbeddingsWithRetryAsync(IEnumerable batchChunkTexts) 158 | { 159 | EmbeddingGenerationOptions embeddingGenerationOptions = new() 160 | { 161 | Dimensions = embeddingDimensions 162 | }; 163 | 164 | int retryCount = 0; 165 | while (retryCount < MaxRetryCount) 166 | { 167 | try 168 | { 169 | return await embeddingClient.GenerateEmbeddingsAsync(batchChunkTexts.Select(p => p.Text).ToList(), embeddingGenerationOptions); 170 | } 171 | catch (ClientResultException ex) 172 | { 173 | if (ex.Status is ((int)HttpStatusCode.TooManyRequests) or ((int)HttpStatusCode.Unauthorized)) 174 | { 175 | if (retryCount >= MaxRetryCount) 176 | { 177 | throw new Exception($"Max retry attempts reached generating embeddings with exception: {ex}."); 178 | } 179 | 180 | retryCount++; 181 | 182 | await Task.Delay(RetryDelay); 183 | } 184 | else 185 | { 186 | throw new Exception($"Failed to generate embeddings with error: {ex}."); 187 | } 188 | } 189 | } 190 | 191 | throw new Exception($"Failed to generate embeddings after retrying for ${MaxRetryCount} times."); 192 | } 193 | 194 | private async Task HandleBlobDeleteEventAsync(BlobClient blobClient) 195 | { 196 | // TODO - Implement me :) 197 | _logger.LogInformation("Handling delete event for blob name {name}.", blobClient.Name); 198 | 199 | await Task.Delay(1); 200 | } 201 | 202 | private string SantizeDatabaseObjectName(string name) 203 | { 204 | string santized = name.Trim(); 205 | if (santized.StartsWith('[') && santized.EndsWith(']')) 206 | return santized; 207 | else 208 | return "[" + santized + "]"; 209 | } 210 | 211 | private async Task EnsureDocumentTableExistsAsync(string connectionString) 212 | { 213 | _logger.LogInformation("Creating table if it does not exist yet..."); 214 | 215 | string SanitizedName = SantizeDatabaseObjectName(TableSchemaName) + "." + SantizeDatabaseObjectName(TableName); 216 | _logger.LogInformation("Document Table: {0}", SanitizedName); 217 | 218 | string createDocumentTableScript = $@" 219 | IF NOT EXISTS (SELECT 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = PARSENAME('{SanitizedName}', 1) AND TABLE_SCHEMA = PARSENAME('{SanitizedName}', 2)) 220 | BEGIN 221 | CREATE TABLE {SanitizedName} ( 222 | [Id] INT IDENTITY(1,1) PRIMARY KEY NOT NULL, 223 | [ChunkId] INT NULL, 224 | [DocumentUrl] VARCHAR(1000) NULL, 225 | [Embedding] VECTOR(1536) NULL, 226 | [ChunkText] VARCHAR(MAX) NULL, 227 | [PageNumber] INT NULL 228 | ); 229 | END"; 230 | 231 | using (var connection = new SqlConnection(connectionString)) 232 | { 233 | //connection.AccessToken = token.Token; 234 | await connection.ExecuteAsync(createDocumentTableScript); 235 | } 236 | } 237 | } 238 | 239 | -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/Document.cs: -------------------------------------------------------------------------------- 1 |  2 | namespace DocumentVectorPipelineFunctions 3 | { 4 | public class Document 5 | { 6 | public int Id { get; set; } 7 | public int? ChunkId { get; set; } 8 | public required string DocumentUrl { get; set; } 9 | public required string Embedding { get; set; } 10 | public required string ChunkText { get; set; } 11 | public int? PageNumber { get; set; } 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/DocumentVectorPipelineFunctions.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | net8.0 4 | v4 5 | Exe 6 | enable 7 | enable 8 | 4790278e-5c51-4fec-a397-f7eaa13e76f5 9 | False 10 | 0.1.0.0 11 | preview.1 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | PreserveNewest 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/Program.cs: -------------------------------------------------------------------------------- 1 | using System.ClientModel.Primitives; 2 | using System.Text.Json; 3 | using Azure.AI.FormRecognizer.DocumentAnalysis; 4 | using Azure.AI.OpenAI; 5 | using Azure.Core; 6 | using Azure.Identity; 7 | using DocumentVectorPipelineFunctions; 8 | using Microsoft.Extensions.Configuration; 9 | using Microsoft.Extensions.DependencyInjection; 10 | using Microsoft.Extensions.Hosting; 11 | using OpenAI.Embeddings; 12 | using Microsoft.Extensions.Logging; 13 | 14 | var _logger = LoggerFactory.Create(builder => builder.AddConsole()).CreateLogger("Program"); 15 | 16 | const string AzureDocumentIntelligenceEndpointConfigName = "AzureDocumentIntelligenceConnectionString"; 17 | 18 | const string AzureOpenAIConnectionString = "AzureOpenAIConnectionString"; 19 | const string AzureOpenAIModelDeploymentConfigName = "AzureOpenAIModelDeployment"; 20 | const string AzureDocumentIntelligenceKey = "AzureDocumentIntelligenceKey"; 21 | const string AzureOpenAIKey = "AzureOpenAIKey"; 22 | 23 | string? managedIdentityClientId = Environment.GetEnvironmentVariable("AzureManagedIdentityClientId"); 24 | bool local = Convert.ToBoolean(Environment.GetEnvironmentVariable("RunningLocally") ?? "false"); 25 | 26 | _logger.LogInformation($"Running locally: {local}"); 27 | 28 | TokenCredential credential = local 29 | ? new DefaultAzureCredential() 30 | : new ManagedIdentityCredential(clientId: managedIdentityClientId); 31 | 32 | var hostBuilder = new HostBuilder() 33 | .ConfigureFunctionsWorkerDefaults() 34 | .ConfigureAppConfiguration(config => 35 | { 36 | config.AddUserSecrets(optional: true, reloadOnChange: false); 37 | }); 38 | 39 | hostBuilder.ConfigureServices(sc => 40 | { 41 | sc.AddSingleton(sp => 42 | { 43 | var config = sp.GetRequiredService(); 44 | 45 | Azure.AzureKeyCredential? keyCredential = null; 46 | var docaiKey = config[AzureDocumentIntelligenceKey] ?? throw new Exception($"Configure {AzureDocumentIntelligenceKey}"); 47 | if (!string.IsNullOrEmpty(docaiKey)) 48 | { 49 | _logger.LogInformation($"Using Azure Key Credential for Azure Document Intelligence service"); 50 | keyCredential = new Azure.AzureKeyCredential(docaiKey); 51 | } 52 | 53 | var documentIntelligenceEndpoint = config[AzureDocumentIntelligenceEndpointConfigName] ?? throw new Exception($"Configure {AzureDocumentIntelligenceEndpointConfigName}"); 54 | var documentAnalysisClient = keyCredential == null ? 55 | new DocumentAnalysisClient(new Uri(documentIntelligenceEndpoint), credential) : 56 | new DocumentAnalysisClient(new Uri(documentIntelligenceEndpoint), keyCredential); 57 | return documentAnalysisClient; 58 | }); 59 | sc.AddSingleton(sp => 60 | { 61 | var config = sp.GetRequiredService(); 62 | 63 | Azure.AzureKeyCredential? keyCredential = null; 64 | var azureAIKey = config[AzureOpenAIKey] ?? throw new Exception($"Configure {AzureOpenAIKey}"); 65 | if (!string.IsNullOrEmpty(azureAIKey)) 66 | { 67 | _logger.LogInformation($"Using Azure Key Credential for Azure Open AI service"); 68 | keyCredential = new Azure.AzureKeyCredential(azureAIKey); 69 | } 70 | var openAIEndpoint = config[AzureOpenAIConnectionString] ?? throw new Exception($"Configure {AzureOpenAIConnectionString}"); 71 | 72 | // TODO: Implement a custom retry policy that takes the retry-after header into account. 73 | var options = new AzureOpenAIClientOptions() 74 | { 75 | ApplicationId = "DocumentIngestion", 76 | RetryPolicy = new ClientRetryPolicy(maxRetries: 10), 77 | }; 78 | 79 | var azureOpenAIClient = keyCredential == null ? 80 | new AzureOpenAIClient(new Uri(openAIEndpoint), credential, options) : 81 | new AzureOpenAIClient(new Uri(openAIEndpoint), keyCredential, options); 82 | return azureOpenAIClient.GetEmbeddingClient(config[AzureOpenAIModelDeploymentConfigName] ?? throw new Exception($"Configure {AzureOpenAIModelDeploymentConfigName}")); 83 | }); 84 | }); 85 | 86 | var host = hostBuilder.Build(); 87 | host.Run(); 88 | -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/TextChunker.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | using Azure.AI.FormRecognizer.DocumentAnalysis; 3 | 4 | namespace DocumentVectorPipelineFunctions; 5 | 6 | internal record struct TextChunk( 7 | string Text, 8 | int PageNumberIfKnown, 9 | int ChunkNumber); 10 | 11 | internal class TextChunker 12 | { 13 | private const int MaxChunkSize = 2048; 14 | 15 | public static IEnumerable FixedSizeChunking(AnalyzeResult? result, int chunkSize = MaxChunkSize) 16 | { 17 | if (result == null) 18 | { 19 | yield break; 20 | } 21 | 22 | var sb = new StringBuilder(chunkSize); 23 | var pageIndex = 0; 24 | var chunkIndex = 0; 25 | foreach (var page in result.Pages) 26 | { 27 | foreach (var word in page.Words) 28 | { 29 | sb.Append(word.Content).Append(' '); 30 | if (sb.Length > chunkSize) 31 | { 32 | sb.Length -= 1; 33 | string chunk = sb.ToString(); 34 | sb.Clear(); 35 | 36 | yield return new TextChunk(chunk, pageIndex, chunkIndex); 37 | chunkIndex++; 38 | } 39 | } 40 | pageIndex++; 41 | } 42 | 43 | if (sb.Length > 1) 44 | { 45 | sb.Length -= 1; 46 | string chunk = sb.ToString(); 47 | yield return new TextChunk(chunk, pageIndex, chunkIndex); 48 | } 49 | } 50 | } -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /AzureSQL/csharp/DocumentVectorPipelineFunctions/local.settings.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "IsEncrypted": false, 3 | "Values": { 4 | "AzureWebJobsStorage": "UseDevelopmentStorage=true", 5 | "AzureWebJobsAzureBlobStorageAccConnectionString__blobServiceUri": "https://.blob.core.windows.net", 6 | "AzureWebJobsAzureBlobStorageAccConnectionString__queueServiceUri": "https://.queue.core.windows.net", 7 | "AzureDocumentIntelligenceConnectionString": "https://.cognitiveservices.azure.com/", 8 | "AzureDocumentIntelligenceKey": "", 9 | "AzureOpenAIKey": "", 10 | "AzureOpenAIConnectionString": "https://.openai.azure.com/", 11 | "RunningLocally": true, 12 | "AzureOpenAIModelDeployment": "text-embedding-3-small", 13 | "FUNCTIONS_WORKER_RUNTIME": "dotnet-isolated", 14 | "SqlConnectionString": "Server=tcp:.database.windows.net,1433;Initial Catalog=;Connection Timeout=30;Authentication=Active Directory Default", 15 | "DocumentTableSchema": "dbo", 16 | "DocumentTableName": "[document]" 17 | } 18 | } 19 | 20 | -------------------------------------------------------------------------------- /AzureSQL/csharp/README.md: -------------------------------------------------------------------------------- 1 | # Azure SQL Database, Document to Vector ingestion pipeline Proof-of-concept 2 | 3 | ## Background 4 | This purpose of this project is to demonstrate a proof of concept pipeline for ingesting content stored in document form (pdf, docx, etc) into Azure SQL database, using the new vector support, for information retrieval. 5 | 6 | **Let us know what you think!** Create a GitHub issue for any bugs/feature requests. 7 | 8 | ### Pipeline stages 9 | The basic stages of the pipeline include: 10 | 11 | 1. User uploads a file to Azure blob storage. 12 | 1. Blob Created Event triggers Azure Function. 13 | 1. Azure Function - calls document intelligence service to extract text. 14 | 1. Azure Document Intelligence Service - converts the document format into raw text. 15 | 1. Azure Function Chunking process- break text into reasonable size chunks for LLMs to process. 16 | 1. Azure Functions Generate embedding - using an LLM to produce a vector embedding of the semantics of text chunk. 17 | 1. Chunk & Embedding storage - storing each text chunk along with it's embedding in Azure SQL Database for semantic and Full-text searches. 18 | 19 | ![pipleline](images/azuresql_pipeline.png "Pipeline") 20 | 21 | ### Technology choices 22 | Currently this proof of concept uses: 23 | * Azure Blob storage for upload of documents. 24 | * Azure Functions to process the pipeline. 25 | * Azure Application Insights for logging. 26 | * Azure Managed Identity to connect resources. 27 | * Azure AI Document Intelligenct for text extraction using the `prebuilt-layout` model. 28 | * Fixed size, non-overlapping text chunking. 29 | * The `text-embedding-3-large` embedding model from Azure OpenAI for embedding. 30 | * Azure SQL Server to store and retrieve vector embeddings. 31 | 32 | ## Setup 33 | 34 | ### Prerequisites 35 | 36 | * An Azure subscription with access to Azure OpenAI. 37 | * The Azure CLI installed. 38 | * Azure Function Core Tools installed. 39 | * A Powershell prompt. 40 | 41 | ### Steps 42 | 43 | 1. Set subscription id and the resource group name that you want to deploy the resources to.: 44 | 45 | ```powershell 46 | $sub = "" 47 | $rg = "" 48 | az account set --subscription $sub 49 | ``` 50 | 51 | 1. Create a Resource Group in your Azure subscritpion in the region where you want your resources deployed. Ensure it's a region that supports all of the above Azure Resource types. Examples include `West US`, `East US`, and `East US2`. 52 | 53 | ```powershell 54 | $l = 'eastus' 55 | az group create -l $l -n $rg 56 | ``` 57 | 58 | 1. Get your Principal ID and set it as a variable 59 | 60 | ```powershell 61 | $adId = az ad signed-in-user show --query id -o tsv 62 | ``` 63 | 64 | 1. Set the `baseName` variable to provide a base name for the created resources, and deploy initial set of resources 65 | 66 | ```powershell 67 | $baseName = 'docai' 68 | 69 | az deployment group create --name "${baseName}deploy" --resource-group $rg --template-file '.\deployment\main.bicep' -p .\deployment\main.bicepparam --parameters userPrincipalId=$adId baseName=$baseName 70 | ``` 71 | 72 | This step will likely take several minutes to complete - it will create all of the required Azure resources. 73 | 74 | NOTE: Some resource names must be globally unique. You can set a different base name for the created resources by altering the `baseName` variable value. 75 | 76 | 1. Enable access to the managed identity in your Azure SQL database 77 | 78 | 1. Navigate to the Azure SQL database account created in the Azure Portal. 79 | 1. Click on the `Query editor(preview)` blade. 80 | 1. Execute the following query to allow the user managed identity that has been createed `useridentity` to allow to access Azure SQL. 81 | ``` SQL 82 | CREATE USER [docaiuseridentity] FROM EXTERNAL PROVIDER; 83 | ALTER ROLE db_owner ADD MEMBER [docaiuseridentity]; 84 | ``` 85 | 1. See the image below: 86 | 87 | ![screenshot](images/azuresql_managedidentity.png "Enable vector search") 88 | 89 | 1. Build the Azure Function 90 | 91 | ```powershell 92 | dotnet publish -c Release 93 | ``` 94 | 95 | 1. Compress the Azure Function code 96 | 97 | ```powershell 98 | echo "---> Compressing Function Code" 99 | Compress-Archive .\DocumentVectorPipelineFunctions\bin\Release\net8.0\* publish.zip 100 | ``` 101 | 102 | 1. Deploy the functions app code 103 | 104 | ```powershell 105 | echo "---> Uploading Function Code" 106 | az functionapp deployment source config-zip -g $rg -n "${baseName}funcapp" --src .\publish.zip 107 | ``` 108 | 109 | 1. Restart the Azure Function 110 | 111 | ```powershell 112 | az functionapp restart -g $rg -n "${baseName}funcapp" 113 | ``` 114 | 115 | 1. Monitor traces 116 | ```powershell 117 | # Monitor traces 118 | echo "---> Monitoring Function Code" 119 | func azure functionapp logstream "${baseName}funcapp" 120 | ``` 121 | Note - `func` above comes from the Azure Functions tools. You can also view this log stream in the Azure Portal by navigating the the Azure Functions app created above, and clicking on the `Monitoring\Log Stream` blade. 122 | 123 | 1. Upload documents to Azure blob storage account 124 | 1. Navigate to the storage account created above (`docingblobacc` by default). 125 | 1. Click on the `Storage Browser` blade 126 | 1. Click on `Blob containers` and then the `documents` folder. 127 | 1. Click the `Upload` button in the toolbar, and then drag or browse to a document. 128 | 1. Check the event stream, and your Azure SQL database account. The document should be processed and ingested into a `document` table. 129 | 130 | 1. Query data 131 | 132 | Build an intelligent, context-aware application using the searchable data in your Azure SQL database account. See the [documentation](https://github.com/Azure-Samples/azure-sql-db-vector-search) for details. 133 | 134 | ## Contributing 135 | 136 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 137 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 138 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 139 | 140 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 141 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 142 | provided by the bot. You will only need to do this once across all repos using our CLA. 143 | 144 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 145 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 146 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 147 | 148 | ## Trademarks 149 | 150 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 151 | trademarks or logos is subject to and must follow 152 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 153 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 154 | Any use of third-party trademarks or logos are subject to those third-party's policies. 155 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/azuresql.bicep: -------------------------------------------------------------------------------- 1 | param location string = resourceGroup().location 2 | 3 | // PrincipalId to be the SQL Admin 4 | @description('PrincipalId to be the SQL Admin') 5 | param userPrincipalId string 6 | 7 | param managedIdentityName string 8 | param azuresqldbName string 9 | param tags object 10 | 11 | param azuresqlServerName string 12 | 13 | // Get existing managed identity resource 14 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 15 | name: managedIdentityName 16 | } 17 | 18 | 19 | resource azuresqlserver 'Microsoft.Sql/servers@2023-08-01-preview' = { 20 | name: azuresqlServerName 21 | location: location 22 | tags: tags 23 | identity: { 24 | type: 'UserAssigned' 25 | userAssignedIdentities: { 26 | '${managedIdentity.id}': {} 27 | } 28 | } 29 | properties: { 30 | minimalTlsVersion: '1.2' 31 | publicNetworkAccess: 'Enabled' 32 | primaryUserAssignedIdentityId: managedIdentity.id 33 | administrators: { 34 | administratorType: 'ActiveDirectory' 35 | principalType: 'User' 36 | login: '${azuresqlServerName}-admin' 37 | sid: userPrincipalId 38 | tenantId: subscription().tenantId 39 | azureADOnlyAuthentication: true 40 | } 41 | restrictOutboundNetworkAccess: 'Disabled' 42 | } 43 | } 44 | resource azuresqldatabase 'Microsoft.Sql/servers/databases@2022-05-01-preview' = { 45 | parent: azuresqlserver 46 | name: azuresqldbName 47 | location: location 48 | sku: { 49 | name: 'GP_S_Gen5' 50 | tier: 'GeneralPurpose' 51 | family: 'Gen5' 52 | capacity: 1 53 | } 54 | properties: { 55 | maxSizeBytes: 268435456000 // 250 GB 56 | zoneRedundant: false 57 | readScale: 'Disabled' 58 | requestedBackupStorageRedundancy: 'Local' 59 | minCapacity: json('0.5') 60 | autoPauseDelay: 60 // Serverless 61 | collation: 'SQL_Latin1_General_CP1_CI_AS' 62 | catalogCollation: 'SQL_Latin1_General_CP1_CI_AS' 63 | createMode: 'Default' 64 | } 65 | } 66 | 67 | resource firewallRule 'Microsoft.Sql/servers/firewallRules@2022-05-01-preview' = { 68 | parent: azuresqlserver 69 | name: '${azuresqlServerName}-AllowAllWindowsAzureIps' 70 | properties: { 71 | startIpAddress: '0.0.0.0' 72 | endIpAddress: '0.0.0.0' 73 | } 74 | } 75 | 76 | output sqlServerName string = azuresqlServerName 77 | output sqlDatabaseName string = azuresqldbName 78 | 79 | //output sqlServerFullyQualifiedDomainName string = azuresqldbName.properties.fullyQualifiedDomainName 80 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/azuresql.bicepparam: -------------------------------------------------------------------------------- 1 | using './azuresql.bicep' 2 | 3 | param azuresqldbName = 'dociingdb' 4 | param managedIdentityName = 'docinguseridentity' 5 | param tags = {} 6 | param azuresqlServerName = 'dociingdb-server' 7 | param userPrincipalId = '' 8 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/documentintelligence.bicep: -------------------------------------------------------------------------------- 1 | @description('Location to deploy the resource. Defaults to the location of the resource group.') 2 | param location string = resourceGroup().location 3 | 4 | // Input parameters 5 | param name string 6 | param tags object 7 | param sku object 8 | param publicNetworkAccess string 9 | param disableLocalAuth bool 10 | 11 | // Assign user identity permissions to storage account 12 | param managedIdentityName string 13 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 14 | name: managedIdentityName 15 | } 16 | 17 | // Create document intelligence resource 18 | resource documentIntelligence 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' = { 19 | name: name 20 | location: location 21 | tags: tags 22 | kind: 'FormRecognizer' 23 | properties: { 24 | customSubDomainName: name 25 | disableLocalAuth: disableLocalAuth 26 | publicNetworkAccess: publicNetworkAccess 27 | networkAcls: { 28 | defaultAction: 'Allow' 29 | ipRules: [] 30 | virtualNetworkRules: [] 31 | } 32 | } 33 | identity: { 34 | type: 'UserAssigned' 35 | userAssignedIdentities: { 36 | '${managedIdentity.id}': {} 37 | } 38 | } 39 | sku: sku 40 | } 41 | 42 | param storage_account_id_roles array = ['a97b65f3-24c7-4388-baec-2e87135dc908'] //Cognitive service user 43 | resource roleAssignmentDocumentIntelligence 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 44 | for id_role in storage_account_id_roles: { 45 | name: guid(resourceGroup().id, '${documentIntelligence.name}-storagerole', id_role) 46 | scope: documentIntelligence 47 | properties: { 48 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 49 | principalId: managedIdentity.properties.principalId 50 | } 51 | } 52 | ] 53 | 54 | // Output parameters 55 | @description('Name for the deployed Document Intelligence resource.') 56 | output DocumentIntelligenceName string = documentIntelligence.name 57 | 58 | @description('Endpoint for the deployed Document Intelligence resource.') 59 | output DocumentIntelligenceEndpoint string = documentIntelligence.properties.endpoint 60 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/documentintelligence.bicepparam: -------------------------------------------------------------------------------- 1 | using './documentintelligence.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param name = 'docingdocintl' 5 | 6 | param tags = {} 7 | param sku = { 8 | name: 'S0' 9 | } 10 | param publicNetworkAccess = 'Enabled' 11 | param disableLocalAuth = false 12 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/functionapp.bicep: -------------------------------------------------------------------------------- 1 | var location = resourceGroup().location 2 | 3 | // Input params 4 | param funcAppStorageAccountName string 5 | param funcAppStorageSkuName string 6 | param appInsightsName string 7 | param appServicePlanName string 8 | param functionAppName string 9 | param logAnalyticsName string 10 | param managedIdentityName string 11 | param azuresqldbName string 12 | param azuresqlserverName string 13 | param diAccountName string 14 | param openAIAccountName string 15 | param storageAccountName string 16 | param modelDeployment string 17 | param modelDimensions string 18 | 19 | // Get existing managed identity resource 20 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 21 | name: managedIdentityName 22 | } 23 | 24 | resource azuresqlserver 'Microsoft.Sql/servers@2023-08-01-preview' existing= { 25 | name: azuresqlserverName 26 | } 27 | 28 | resource azuresqldatabase 'Microsoft.Sql/servers/databases@2022-05-01-preview' existing= { 29 | name: azuresqldbName 30 | } 31 | 32 | resource documentIntelligence 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' existing = { 33 | name: diAccountName 34 | } 35 | 36 | resource openAi 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' existing = { 37 | name: openAIAccountName 38 | } 39 | 40 | resource storageAccount 'Microsoft.Storage/storageAccounts@2023-05-01' existing = { 41 | name: storageAccountName 42 | } 43 | 44 | var sqlconnectionstringvalue = 'Server=tcp:${azuresqlserver.properties.fullyQualifiedDomainName},1433;Initial Catalog=${azuresqldatabase.name};Encrypt=True;TrustServerCertificate=False;Connection Timeout=30;Authentication=Active Directory Managed Identity;User Id=${managedIdentity.properties.clientId};' 45 | // Create webapps storage account to hold webapps related resources 46 | resource func_app_storage_account 'Microsoft.Storage/storageAccounts@2023-05-01' = { 47 | name: funcAppStorageAccountName 48 | location: location 49 | sku: { 50 | name: funcAppStorageSkuName 51 | } 52 | kind: 'StorageV2' 53 | properties: { 54 | accessTier: 'Hot' 55 | allowSharedKeyAccess: false // Ensure shared key access is disabled 56 | } 57 | 58 | } 59 | // Assign storage account roles to func_app_storage_account 60 | // Storage Account Contributor, Storage Blob Data Owner , Storage Queue Data Contributor 61 | param storage_account_id_roles array = [ 62 | '17d1049b-9a84-46fb-8f53-869881c3d3ab', 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b', '974c5e8b-45b9-4653-ba55-5f855dd0fb88' 63 | ] 64 | 65 | resource roleAssignmentFuncStorageAccount 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 66 | for id_role in storage_account_id_roles: { 67 | name: guid(resourceGroup().id, '${func_app_storage_account.name}-webjobsrole', id_role) 68 | scope: func_app_storage_account 69 | properties: { 70 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 71 | principalId: managedIdentity.properties.principalId 72 | principalType: 'ServicePrincipal' 73 | } 74 | } 75 | ] 76 | 77 | // Create a new Log Analytics workspace to back the Azure Application Insights instance 78 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2023-09-01' = { 79 | name: logAnalyticsName 80 | location: location 81 | properties: { 82 | sku: { 83 | name: 'PerGB2018' 84 | } 85 | retentionInDays: 30 86 | features: { 87 | enableLogAccessUsingOnlyResourcePermissions: true 88 | } 89 | workspaceCapping: { 90 | dailyQuotaGb: 1 91 | } 92 | publicNetworkAccessForIngestion: 'Enabled' 93 | publicNetworkAccessForQuery: 'Enabled' 94 | } 95 | } 96 | 97 | // Application Insights instance 98 | resource appInsights 'Microsoft.Insights/components@2020-02-02' = { 99 | name: appInsightsName 100 | location: location 101 | kind: 'web' 102 | properties: { 103 | Application_Type: 'web' 104 | publicNetworkAccessForIngestion: 'Enabled' 105 | publicNetworkAccessForQuery: 'Enabled' 106 | WorkspaceResourceId: logAnalytics.id 107 | } 108 | } 109 | 110 | // Web server farm 111 | resource appservice_plan 'Microsoft.Web/serverfarms@2023-12-01' = { 112 | name: appServicePlanName 113 | location: location 114 | kind: 'functionapp' 115 | sku: { 116 | name: 'Y1' 117 | } 118 | properties: {} 119 | } 120 | 121 | // Deploy the Azure Function app with application 122 | resource funcApp 'Microsoft.Web/sites@2023-12-01' = { 123 | name: functionAppName 124 | location: location 125 | kind: 'functionapp' 126 | identity: { 127 | type: 'UserAssigned' 128 | userAssignedIdentities: { 129 | '${managedIdentity.id}': {} 130 | } 131 | } 132 | properties: { 133 | httpsOnly: true 134 | serverFarmId: appservice_plan.id 135 | keyVaultReferenceIdentity: managedIdentity.id 136 | enabled: true 137 | siteConfig: { 138 | appSettings: [ 139 | { 140 | name: 'AzureWebJobsStorage__accountName' 141 | value: funcAppStorageAccountName 142 | } 143 | { 144 | name: 'AzureWebJobsStorage__clientId' 145 | value: managedIdentity.properties.clientId 146 | } 147 | { 148 | name: 'AzureWebJobsStorage__blobServiceUri' 149 | value: func_app_storage_account.properties.primaryEndpoints.blob 150 | } 151 | { 152 | name: 'AzureWebJobsStorage__queueServiceUri' 153 | value: func_app_storage_account.properties.primaryEndpoints.queue 154 | } 155 | { 156 | name: 'AzureBlobStorageAccConnectionString___clientId' 157 | value: managedIdentity.properties.clientId 158 | } 159 | { 160 | name: 'AzureBlobStorageAccConnectionString__blobServiceUri' 161 | value: storageAccount.properties.primaryEndpoints.blob 162 | } 163 | { 164 | name: 'AzureBlobStorageAccConnectionString__queueServiceUri' 165 | value: storageAccount.properties.primaryEndpoints.queue 166 | } 167 | { 168 | name: 'AzureBlobStorageAccConnectionString__credential' 169 | value: 'managedIdentity' 170 | } 171 | { 172 | name: 'AzureBlobStorageAccConnectionString__managedIdentityResourceId' 173 | value: managedIdentity.id 174 | } 175 | { 176 | name: 'AzureManagedIdentityClientId' 177 | value: managedIdentity.properties.clientId 178 | } 179 | { 180 | name: 'SqlConnectionString' 181 | value: sqlconnectionstringvalue 182 | } 183 | { 184 | name: 'AzureDocumentIntelligenceConnectionString' 185 | value: documentIntelligence.properties.endpoint 186 | } 187 | { 188 | name: 'AzureDocumentIntelligenceKey' 189 | value: documentIntelligence.listKeys().key1 190 | } 191 | { 192 | name: 'AzureOpenAIConnectionString' 193 | value: openAi.properties.endpoint 194 | } 195 | { 196 | name: 'AzureOpenAIKey' 197 | value: openAi.listKeys().key1 198 | } 199 | { 200 | name: 'AzureOpenAIModelDeployment' 201 | value: modelDeployment 202 | } 203 | { 204 | name: 'AzureOpenAIModelDimensions' 205 | value: modelDimensions 206 | } 207 | { 208 | name: 'AzureFunctionsJobHost__functionTimeout' 209 | value: '00:10:00' 210 | } 211 | { 212 | name: 'FUNCTIONS_WORKER_RUNTIME' 213 | value: 'dotnet-isolated' 214 | } 215 | { 216 | name: 'FUNCTIONS_EXTENSION_VERSION' 217 | value: '~4' 218 | } 219 | { 220 | name: 'WEBSITE_RUN_FROM_PACKAGE' 221 | value: '1' 222 | } 223 | { 224 | name: 'WEBSITE_RUN_FROM_PACKAGE_BLOB_MI_RESOURCE_ID' 225 | value: managedIdentity.id 226 | } 227 | { 228 | name: 'WEBSITE_USE_PLACEHOLDER_DOTNETISOLATED' 229 | value: '1' 230 | } 231 | { 232 | name: 'APPINSIGHTS_CONNECTION_STRING' 233 | value: appInsights.properties.ConnectionString 234 | } 235 | { 236 | name: 'APPINSIGHTS_INSTRUMENTATIONKEY' 237 | value: appInsights.properties.InstrumentationKey 238 | } 239 | ] 240 | } 241 | } 242 | } 243 | 244 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/functionapp.bicepparam: -------------------------------------------------------------------------------- 1 | using './functionapp.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param functionAppName = 'docingfunc' 5 | //param cosmosdbAccountName = 'docingcosmosacc' 6 | param diAccountName = 'docingdocintl' 7 | param openAIAccountName = 'docingopenaiacc' 8 | param storageAccountName = 'docingblobacc' 9 | 10 | param funcAppStorageSkuName = 'Standard_LRS' 11 | param funcAppStorageAccountName = '${functionAppName}store' 12 | param appInsightsName = '${functionAppName}insight' 13 | param appServicePlanName = '${functionAppName}service' 14 | param logAnalyticsName = '${functionAppName}log' 15 | 16 | param modelDeployment = 'text-embedding-3-large' 17 | param modelDimensions = '1536' 18 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/main.bicep: -------------------------------------------------------------------------------- 1 | // Resource params 2 | param tags object = {} 3 | 4 | @description('Base name for all resources') 5 | param baseName string 6 | 7 | // Resource names 8 | var managedIdentity_name = '${baseName}useridentity' 9 | var storage_name = '${baseName}blobacc' 10 | var function_app_name = '${baseName}funcapp' 11 | var document_intelligence_name = '${baseName}docintl' 12 | var open_ai_name = '${baseName}openai' 13 | var azuresqldb_name = '${baseName}db' 14 | var azuresqlServerName = '${baseName}server' 15 | var function_app_storageAccountName = '${function_app_name}store' 16 | var function_app_appInsightsName = '${function_app_name}insight' 17 | var function_app_logAnalyticsName = '${function_app_name}log' 18 | var function_app_appServicePlanName = '${function_app_name}service' 19 | 20 | // PrincipalId to be the SQL Admin 21 | @description('PrincipalId to be the SQL Admin') 22 | param userPrincipalId string 23 | 24 | // Storage params 25 | param storage_containers array = [] 26 | 27 | // Function app params 28 | param function_app_storageSkuName string 29 | 30 | // Open AI params 31 | param open_ai_deployments array 32 | param open_ai_sku string 33 | param open_ai_kind string 34 | param open_ai_format string 35 | param open_ai_publicNetworkAccess string 36 | param modelDeployment string 37 | param modelDimensions string 38 | 39 | // Document intelligence params 40 | param document_intelligence_sku object 41 | param document_intelligence_publicNetworkAccess string 42 | param document_intelligence_disableLocalAuth bool 43 | 44 | // User managed identity resource 45 | module userManagedIdentity_deployment 'userIdentity.bicep' = { 46 | name: 'userManagedIdentity_deployment' 47 | params: { 48 | managedIdentityName: managedIdentity_name 49 | } 50 | } 51 | 52 | // Storage resource 53 | module storage_deployment 'storage.bicep' = { 54 | name: 'storage_deployment' 55 | params: { 56 | name: storage_name 57 | containers: storage_containers 58 | tags: tags 59 | managedIdentityName: managedIdentity_name 60 | } 61 | dependsOn: [ 62 | userManagedIdentity_deployment 63 | ] 64 | } 65 | 66 | // Azure SQL Database resource 67 | module azuresql_deployment 'azuresql.bicep' = { 68 | name: 'azuresql_deployment' 69 | params: { 70 | managedIdentityName: managedIdentity_name 71 | azuresqldbName: azuresqldb_name 72 | azuresqlServerName: azuresqlServerName 73 | userPrincipalId: userPrincipalId 74 | tags: tags 75 | } 76 | dependsOn: [ 77 | userManagedIdentity_deployment 78 | ] 79 | } 80 | 81 | // Document Intelligence resource 82 | module document_intelligence_deployment 'documentintelligence.bicep' = { 83 | name: 'document_intelligence_deployment' 84 | params: { 85 | name: document_intelligence_name 86 | managedIdentityName: managedIdentity_name 87 | sku: document_intelligence_sku 88 | publicNetworkAccess: document_intelligence_publicNetworkAccess 89 | disableLocalAuth: document_intelligence_disableLocalAuth 90 | tags: tags 91 | } 92 | dependsOn: [ 93 | userManagedIdentity_deployment 94 | storage_deployment 95 | ] 96 | } 97 | 98 | // OpenAI Resource 99 | module open_ai_deployment 'openai.bicep' = { 100 | name: 'open_ai_deployment' 101 | params: { 102 | deployments: open_ai_deployments 103 | managedIdentityName: managedIdentity_name 104 | name: open_ai_name 105 | format: open_ai_format 106 | kind: open_ai_kind 107 | sku: open_ai_sku 108 | publicNetworkAccess: open_ai_publicNetworkAccess 109 | tags: tags 110 | } 111 | dependsOn: [ 112 | userManagedIdentity_deployment 113 | ] 114 | } 115 | 116 | // Function App Resource 117 | module function_app_deployment 'functionapp.bicep' = { 118 | name: 'function_app_deployment' 119 | params: { 120 | managedIdentityName: managedIdentity_name 121 | functionAppName: function_app_name 122 | funcAppStorageSkuName: function_app_storageSkuName 123 | funcAppStorageAccountName: function_app_storageAccountName 124 | appInsightsName: function_app_appInsightsName 125 | appServicePlanName: function_app_appServicePlanName 126 | logAnalyticsName: function_app_logAnalyticsName 127 | diAccountName: document_intelligence_name 128 | openAIAccountName: open_ai_name 129 | storageAccountName: storage_name 130 | modelDeployment: modelDeployment 131 | modelDimensions: modelDimensions 132 | azuresqldbName: azuresqldb_name 133 | azuresqlserverName: azuresqlServerName 134 | } 135 | dependsOn: [ 136 | userManagedIdentity_deployment 137 | storage_deployment 138 | open_ai_deployment 139 | document_intelligence_deployment 140 | azuresql_deployment 141 | ] 142 | } 143 | 144 | // Output params 145 | // User Managed Identity and KeyVault Output Params 146 | output AZURE_USER_MANAGED_IDENTITY_NAME string = userManagedIdentity_deployment.outputs.AzureManagedIdentityName 147 | output AZURE_USER_MANAGED_IDENTITY_ID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityId 148 | output AZURE_USER_MANAGED_IDENTITY_CLIENTID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityClientId 149 | output AZURE_USER_MANAGED_IDENTITY_PRINCIPALID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityPrincipalId 150 | output AZURE_USER_MANAGED_IDENTITY_TENANTID string = userManagedIdentity_deployment.outputs.AzureManagedIdentityTenantId 151 | 152 | // Storage Params 153 | output AZURE_BLOB_STORE_ACCOUNT_NAME string = storage_deployment.outputs.AzureBlobStorageAccountName 154 | output AZURE_BLOB_STORE_ACCOUNT_ENDPOINT string = storage_deployment.outputs.AzureBlobStorageAccountEndpoint 155 | 156 | 157 | // Document Intelligence Params 158 | output AZURE_DOCUMENT_INTELLIGENCE_NAME string = document_intelligence_deployment.outputs.DocumentIntelligenceName 159 | output AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT string = document_intelligence_deployment.outputs.DocumentIntelligenceEndpoint 160 | 161 | // OpenAI 162 | output AZURE_OPEN_AI_SERVICE_NAME string = open_ai_deployment.outputs.openAIServiceName 163 | output AZURE_OPEN_AI_SERVICE_ENDPOINT string = open_ai_deployment.outputs.openAIServiceEndpoint 164 | 165 | // SQL Query 166 | output SQL_QUERY string = 'CREATE USER [${managedIdentity_name}] FROM EXTERNAL PROVIDER;ALTER ROLE db_datareader ADD MEMBER [${managedIdentity_name}];ALTER ROLE db_datawriter ADD MEMBER [${managedIdentity_name}];ALTER ROLE db_owner ADD MEMBER [${managedIdentity_name}];' 167 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/main.bicepparam: -------------------------------------------------------------------------------- 1 | using './main.bicep' 2 | 3 | param baseName = 'docai' 4 | 5 | // Mandatory params 6 | param userPrincipalId = '' 7 | 8 | // Common params 9 | param tags = {} 10 | 11 | // Storage params 12 | param storage_containers = [ 13 | { 14 | name: 'documents' 15 | } 16 | ] 17 | 18 | // Function app params 19 | param function_app_storageSkuName = 'Standard_LRS' 20 | 21 | // Document Intelligence Params 22 | param document_intelligence_sku = { 23 | name: 'S0' 24 | } 25 | param document_intelligence_publicNetworkAccess = 'Enabled' 26 | param document_intelligence_disableLocalAuth = false 27 | 28 | // Open AI params 29 | param modelDeployment = 'text-embedding-3-large' 30 | param modelDimensions = '1536' 31 | param open_ai_deployments = [ 32 | { 33 | name: modelDeployment 34 | sku: { 35 | name: 'Standard' 36 | capacity: 10 37 | } 38 | model: { 39 | name: modelDeployment 40 | version: '1' 41 | } 42 | } 43 | ] 44 | param open_ai_sku = 'S0' 45 | param open_ai_kind = 'OpenAI' 46 | param open_ai_format = 'OpenAI' 47 | param open_ai_publicNetworkAccess = 'Enabled' 48 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/openai.bicep: -------------------------------------------------------------------------------- 1 | param location string = resourceGroup().location 2 | 3 | // Input parameters 4 | param deployments array 5 | param name string 6 | param sku string 7 | param tags object 8 | param kind string 9 | param format string 10 | param publicNetworkAccess string 11 | 12 | // Create openAI resource 13 | resource openAi 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' = { 14 | name: name 15 | location: location 16 | sku: { 17 | name: sku 18 | } 19 | kind: kind 20 | properties: { 21 | customSubDomainName: name 22 | publicNetworkAccess: publicNetworkAccess 23 | } 24 | tags: tags 25 | identity: { 26 | type: 'UserAssigned' 27 | userAssignedIdentities: { 28 | '${managedIdentity.id}': {} 29 | } 30 | } 31 | } 32 | 33 | @batchSize(1) 34 | resource openAiDeployments 'Microsoft.CognitiveServices/accounts/deployments@2024-04-01-preview' = [ 35 | for deployment in deployments: { 36 | parent: openAi 37 | name: deployment.name 38 | sku: { 39 | capacity: deployment.sku.capacity 40 | name: deployment.sku.name 41 | } 42 | properties: { 43 | model: { 44 | format: format 45 | name: deployment.model.name 46 | version: deployment.model.version 47 | } 48 | } 49 | } 50 | ] 51 | 52 | // Assign user managed identity to openai app. 53 | param managedIdentityName string 54 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 55 | name: managedIdentityName 56 | } 57 | param storage_account_id_roles array = [ 58 | 'a97b65f3-24c7-4388-baec-2e87135dc908' //Cognitive Services User 59 | ] 60 | 61 | resource roleAssignmentOpenAIAccount 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 62 | for id_role in storage_account_id_roles: { 63 | name: guid(resourceGroup().id, '${name}-openairole', id_role) 64 | scope: openAi 65 | properties: { 66 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 67 | principalId: managedIdentity.properties.principalId 68 | } 69 | } 70 | ] 71 | 72 | output openAIServiceName string = openAi.name 73 | output openAIServiceEndpoint string = openAi.properties.endpoint 74 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/openai.bicepparam: -------------------------------------------------------------------------------- 1 | using './openai.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | param name = 'docingopenaiacc' 5 | 6 | param deployments = [ 7 | { 8 | name: 'text-embedding-3-large' 9 | sku: { 10 | name: 'Standard' 11 | capacity: 40 12 | } 13 | model: { 14 | name: 'text-embedding-3-large' 15 | version: '1' 16 | } 17 | } 18 | ] 19 | param sku = 'S0' 20 | param kind = 'OpenAI' 21 | param format = 'OpenAI' 22 | param publicNetworkAccess = 'Enabled' 23 | param tags = {} 24 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/storage.bicep: -------------------------------------------------------------------------------- 1 | param location string = resourceGroup().location 2 | 3 | // Input parameters 4 | param name string 5 | param tags object 6 | param containers array = [] 7 | 8 | // Create storage account 9 | resource storage 'Microsoft.Storage/storageAccounts@2023-05-01' = { 10 | name: name 11 | location: location 12 | kind: 'StorageV2' 13 | sku: { 14 | name: 'Standard_LRS' 15 | } 16 | properties: { 17 | allowSharedKeyAccess: false // Ensure shared key access is disabled 18 | } 19 | tags: tags 20 | } 21 | 22 | // Create storage containers 23 | resource blobService 'Microsoft.Storage/storageAccounts/blobServices@2023-05-01' = { 24 | parent: storage 25 | name: 'default' 26 | } 27 | 28 | resource blobContainers 'Microsoft.Storage/storageAccounts/blobServices/containers@2023-05-01' = [ 29 | for container in containers: { 30 | parent: blobService 31 | name: container.name 32 | } 33 | ] 34 | 35 | // Assign user identity permissions to storage account 36 | param managedIdentityName string 37 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' existing = { 38 | name: managedIdentityName 39 | } 40 | 41 | // Assign storage account roles to blobService 42 | // Storage Account Contributor, Storage Blob Data Owner , Storage Queue Data Contributor 43 | param storage_account_id_roles array = [ 44 | '17d1049b-9a84-46fb-8f53-869881c3d3ab', 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b', '974c5e8b-45b9-4653-ba55-5f855dd0fb88' 45 | ] 46 | // Assign roles to storage account 47 | resource roleAssignmentStorageAccount 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ 48 | for id_role in storage_account_id_roles: { 49 | name: guid(resourceGroup().id, '${storage.name}-storagerole', id_role) 50 | scope: blobService 51 | properties: { 52 | roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', id_role) 53 | principalId: managedIdentity.properties.principalId 54 | principalType: 'ServicePrincipal' 55 | } 56 | } 57 | ] 58 | 59 | 60 | // Output storage account name, connection string and key 61 | output AzureBlobStorageAccountName string = storage.name 62 | output AzureBlobStorageAccountEndpoint string = storage.properties.primaryEndpoints.blob 63 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/storage.bicepparam: -------------------------------------------------------------------------------- 1 | using './storage.bicep' 2 | 3 | param name = 'docingblobacc' 4 | param managedIdentityName = 'docinguseridentity' 5 | param containers = [ 6 | { 7 | name: 'documents' 8 | } 9 | ] 10 | 11 | param tags = {} 12 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/userIdentity.bicep: -------------------------------------------------------------------------------- 1 | param managedIdentityName string 2 | param location string = resourceGroup().location 3 | 4 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-07-31-preview' = { 5 | name: managedIdentityName 6 | location: location 7 | } 8 | 9 | output AzureManagedIdentityId string = managedIdentity.id 10 | output AzureManagedIdentityName string = managedIdentity.name 11 | output AzureManagedIdentityClientId string = managedIdentity.properties.clientId 12 | output AzureManagedIdentityPrincipalId string = managedIdentity.properties.principalId 13 | output AzureManagedIdentityTenantId string = managedIdentity.properties.tenantId 14 | -------------------------------------------------------------------------------- /AzureSQL/csharp/deployment/userIdentity.bicepparam: -------------------------------------------------------------------------------- 1 | using './userIdentity.bicep' 2 | 3 | param managedIdentityName = 'docinguseridentity' 4 | -------------------------------------------------------------------------------- /AzureSQL/csharp/images/azuresql_managedidentity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/document-vector-pipeline/2b51566423b7c3a20d57a3e82f4739d1830edede/AzureSQL/csharp/images/azuresql_managedidentity.png -------------------------------------------------------------------------------- /AzureSQL/csharp/images/azuresql_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/document-vector-pipeline/2b51566423b7c3a20d57a3e82f4739d1830edede/AzureSQL/csharp/images/azuresql_pipeline.png -------------------------------------------------------------------------------- /AzureSQL/csharp/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/document-vector-pipeline/2b51566423b7c3a20d57a3e82f4739d1830edede/AzureSQL/csharp/images/pipeline.png -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project 2 | 3 | > This repo has been populated by an initial template to help get you started. Please 4 | > make sure to update the content to build a great experience for community-building. 5 | 6 | As the maintainer of this project, please make a few updates: 7 | 8 | - Improving this README.MD file to provide a great experience 9 | - Updating SUPPORT.MD with content about this project's support experience 10 | - Understanding the security reporting process in SECURITY.MD 11 | - Remove this section from the README 12 | 13 | ## Contributing 14 | 15 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 16 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 17 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 18 | 19 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 20 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 21 | provided by the bot. You will only need to do this once across all repos using our CLA. 22 | 23 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 24 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 25 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 26 | 27 | ## Trademarks 28 | 29 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 30 | trademarks or logos is subject to and must follow 31 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 32 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 33 | Any use of third-party trademarks or logos are subject to those third-party's policies. 34 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | --------------------------------------------------------------------------------