├── docs
├── old-tutorial
│ ├── 06-session.md
│ ├── images
│ │ ├── chat-api.png
│ │ ├── stream-response.gif
│ │ ├── upload-function.png
│ │ ├── chat-final-result.gif
│ │ ├── post-request-chat.png
│ │ ├── chat-stream-response.png
│ │ ├── from-messages-method.png
│ │ └── test-upload-function.gif
│ ├── 02-session.md
│ ├── 04-session.md
│ ├── 01-session.md
│ └── 05-session.md
├── tutorial
│ ├── 05-config-files-app.md
│ ├── images
│ │ ├── rag.png
│ │ ├── deployed-app.png
│ │ ├── ollama-page.png
│ │ ├── mistral-7b-page.png
│ │ ├── application-webapp.png
│ │ ├── azure-openai-page.png
│ │ ├── ollama-mistra-cli.png
│ │ ├── function-chat-error.png
│ │ ├── init-functions-project.png
│ │ ├── services-azure-portal.png
│ │ └── azure-functions-project-structure.png
│ ├── 03-understanding-rag.md
│ ├── 01-introduction.md
│ └── 04-preparing-understanding-language-models.md
├── images
│ ├── demo.gif
│ ├── rag.png
│ ├── azd-up.png
│ ├── clone-url.png
│ ├── codespaces.png
│ ├── architecture.drawio.png
│ └── architecture-local.drawio.png
├── cost.md
├── enhance-security.md
├── troubleshooting.md
├── faq.md
└── readme.md
├── packages
├── webapp
│ ├── src
│ │ ├── vite-env.d.ts
│ │ ├── index.ts
│ │ ├── api.ts
│ │ └── message-parser.ts
│ ├── public
│ │ ├── favicon.png
│ │ └── staticwebapp.config.json
│ ├── assets
│ │ ├── panel.svg
│ │ ├── send.svg
│ │ ├── delete.svg
│ │ ├── new-chat.svg
│ │ └── question.svg
│ ├── README.md
│ ├── package.json
│ ├── vite.config.ts
│ ├── tsconfig.json
│ └── index.html
└── api
│ ├── local.settings.json
│ ├── .funcignore
│ ├── src
│ ├── constants.ts
│ ├── http-response.ts
│ ├── security.ts
│ └── functions
│ │ ├── chats-delete.ts
│ │ ├── chats-get.ts
│ │ ├── documents-get.ts
│ │ ├── documents-post.ts
│ │ └── chats-post.ts
│ ├── host.json
│ ├── tsconfig.json
│ ├── .env.sample
│ ├── README.md
│ ├── api.http
│ ├── package.json
│ └── .gitignore
├── .vscode
├── extensions.json
├── launch.json
├── settings.json
└── tasks.json
├── data
├── support.pdf
├── privacy-policy.pdf
├── terms-of-service.pdf
└── README.md
├── .gitignore
├── .editorconfig
├── .github
├── CODE_OF_CONDUCT.md
├── workflows
│ ├── stale-bot.yaml
│ ├── validate-infra.yaml
│ ├── build-test.yaml
│ └── azure-dev.yaml
├── ISSUE_TEMPLATE.md
├── PULL_REQUEST_TEMPLATE.md
├── SECURITY.md
└── CONTRIBUTING.md
├── infra
├── app
│ ├── linked-backend.bicep
│ ├── api.bicep
│ └── vnet.bicep
├── core
│ ├── host
│ │ ├── staticwebapp.bicep
│ │ ├── appserviceplan.bicep
│ │ ├── appservice-appsettings.bicep
│ │ ├── functions.bicep
│ │ ├── appservice.bicep
│ │ └── functions-flex.bicep
│ ├── monitor
│ │ ├── loganalytics.bicep
│ │ ├── applicationinsights.bicep
│ │ └── monitoring.bicep
│ ├── database
│ │ └── cosmos
│ │ │ └── sql
│ │ │ ├── cosmos-sql-role-assign.bicep
│ │ │ └── cosmos-sql-role-def.bicep
│ ├── security
│ │ └── role.bicep
│ ├── ai
│ │ └── cognitiveservices.bicep
│ └── storage
│ │ └── storage-account.bicep
├── main.parameters.json
├── abbreviations.json
└── main.bicep
├── SUPPORT.md
├── azure.yaml
├── LICENSE
├── .devcontainer
└── devcontainer.json
├── scripts
└── upload-documents.js
├── AGENTS.md
└── package.json
/docs/old-tutorial/06-session.md:
--------------------------------------------------------------------------------
1 | # `get-documents.ts`
2 |
3 | to-do
4 |
--------------------------------------------------------------------------------
/packages/webapp/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": ["ms-azuretools.vscode-azurefunctions"]
3 | }
4 |
--------------------------------------------------------------------------------
/data/support.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/data/support.pdf
--------------------------------------------------------------------------------
/docs/tutorial/05-config-files-app.md:
--------------------------------------------------------------------------------
1 | # Preparing the Configuration Files for the `chat` API
2 |
3 | **todo**
4 |
--------------------------------------------------------------------------------
/docs/images/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/demo.gif
--------------------------------------------------------------------------------
/docs/images/rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/rag.png
--------------------------------------------------------------------------------
/data/privacy-policy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/data/privacy-policy.pdf
--------------------------------------------------------------------------------
/docs/images/azd-up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/azd-up.png
--------------------------------------------------------------------------------
/data/terms-of-service.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/data/terms-of-service.pdf
--------------------------------------------------------------------------------
/docs/images/clone-url.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/clone-url.png
--------------------------------------------------------------------------------
/docs/images/codespaces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/codespaces.png
--------------------------------------------------------------------------------
/docs/tutorial/images/rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/rag.png
--------------------------------------------------------------------------------
/docs/images/architecture.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/architecture.drawio.png
--------------------------------------------------------------------------------
/packages/webapp/public/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/packages/webapp/public/favicon.png
--------------------------------------------------------------------------------
/docs/old-tutorial/images/chat-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/chat-api.png
--------------------------------------------------------------------------------
/docs/tutorial/images/deployed-app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/deployed-app.png
--------------------------------------------------------------------------------
/docs/tutorial/images/ollama-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/ollama-page.png
--------------------------------------------------------------------------------
/docs/tutorial/images/mistral-7b-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/mistral-7b-page.png
--------------------------------------------------------------------------------
/docs/images/architecture-local.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/architecture-local.drawio.png
--------------------------------------------------------------------------------
/docs/tutorial/images/application-webapp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/application-webapp.png
--------------------------------------------------------------------------------
/docs/tutorial/images/azure-openai-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/azure-openai-page.png
--------------------------------------------------------------------------------
/docs/tutorial/images/ollama-mistra-cli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/ollama-mistra-cli.png
--------------------------------------------------------------------------------
/docs/old-tutorial/images/stream-response.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/stream-response.gif
--------------------------------------------------------------------------------
/docs/old-tutorial/images/upload-function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/upload-function.png
--------------------------------------------------------------------------------
/docs/tutorial/images/function-chat-error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/function-chat-error.png
--------------------------------------------------------------------------------
/docs/old-tutorial/images/chat-final-result.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/chat-final-result.gif
--------------------------------------------------------------------------------
/docs/old-tutorial/images/post-request-chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/post-request-chat.png
--------------------------------------------------------------------------------
/docs/tutorial/images/init-functions-project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/init-functions-project.png
--------------------------------------------------------------------------------
/docs/tutorial/images/services-azure-portal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/services-azure-portal.png
--------------------------------------------------------------------------------
/docs/old-tutorial/images/chat-stream-response.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/chat-stream-response.png
--------------------------------------------------------------------------------
/docs/old-tutorial/images/from-messages-method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/from-messages-method.png
--------------------------------------------------------------------------------
/docs/old-tutorial/images/test-upload-function.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/test-upload-function.gif
--------------------------------------------------------------------------------
/packages/webapp/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from './api.js';
2 | export * from './components/chat.js';
3 | export * from './components/history.js';
4 | export * from './message-parser.js';
5 |
--------------------------------------------------------------------------------
/docs/tutorial/images/azure-functions-project-structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/azure-functions-project-structure.png
--------------------------------------------------------------------------------
/packages/api/local.settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "IsEncrypted": false,
3 | "Values": {
4 | "FUNCTIONS_WORKER_RUNTIME": "node",
5 | "AzureWebJobsFeatureFlags": "EnableWorkerIndexing"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/packages/webapp/public/staticwebapp.config.json:
--------------------------------------------------------------------------------
1 | {
2 | "trailingSlash": "auto",
3 | "navigationFallback": {
4 | "rewrite": "index.html",
5 | "exclude": ["/assets/*", "*.css"]
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/packages/api/.funcignore:
--------------------------------------------------------------------------------
1 | *.js.map
2 | .git*
3 | .vscode
4 | __azurite_db*__.json
5 | __blobstorage__
6 | __queuestorage__
7 | node_modules/
8 | local.settings.json
9 | test
10 | .faiss
11 | api.http
12 | .env.sample
13 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "Attach to Node Functions",
6 | "type": "node",
7 | "request": "attach",
8 | "port": 9229,
9 | "preLaunchTask": "func: host start"
10 | }
11 | ]
12 | }
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled output
2 | node_modules/
3 | dist/
4 | .tmp/
5 |
6 | # Logs
7 | logs
8 | *.log
9 | npm-debug.log*
10 | pnpm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 |
14 | # Deployment
15 | *.env
16 | .azure
17 |
18 | # Misc
19 | .DS_Store
20 | Thumbs.db
21 | TODO
22 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # Editor configuration, see http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | end_of_line = lf
7 | indent_style = space
8 | indent_size = 2
9 | insert_final_newline = true
10 | trim_trailing_whitespace = true
11 |
12 | [*.md]
13 | max_line_length = off
14 | trim_trailing_whitespace = false
15 |
--------------------------------------------------------------------------------
/packages/api/src/constants.ts:
--------------------------------------------------------------------------------
1 | // Ollama models configuration
2 | // You can see the complete list of available models at https://ollama.ai/models
3 | export const ollamaEmbeddingsModel = 'nomic-embed-text:latest';
4 | export const ollamaChatModel = 'llama3.1:latest';
5 |
6 | // Faiss local store folder
7 | export const faissStoreFolder = '.faiss';
8 |
--------------------------------------------------------------------------------
/packages/api/host.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0",
3 | "logging": {
4 | "applicationInsights": {
5 | "samplingSettings": {
6 | "isEnabled": true,
7 | "excludedTypes": "Request"
8 | }
9 | }
10 | },
11 | "extensionBundle": {
12 | "id": "Microsoft.Azure.Functions.ExtensionBundle",
13 | "version": "[4.*, 5.0.0)"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/packages/webapp/assets/panel.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "azureFunctions.deploySubpath": "packages/api",
3 | "azureFunctions.postDeployTask": "npm install (functions)",
4 | "azureFunctions.projectLanguage": "TypeScript",
5 | "azureFunctions.projectRuntime": "~4",
6 | "debug.internalConsoleOptions": "neverOpen",
7 | "azureFunctions.projectLanguageModel": 3,
8 | "azureFunctions.projectSubpath": "packages/api",
9 | "azureFunctions.preDeployTask": "npm prune (functions)"
10 | }
11 |
--------------------------------------------------------------------------------
/packages/api/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "module": "CommonJS",
4 | "target": "ESNext",
5 | "incremental": true,
6 | "composite": true,
7 | "skipLibCheck": true,
8 | "forceConsistentCasingInFileNames": true,
9 | "outDir": "dist",
10 | "rootDir": ".",
11 | "sourceMap": true,
12 | "strict": true,
13 | "moduleResolution": "node",
14 | "esModuleInterop": true,
15 | "lib": ["ESNext"]
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 |
--------------------------------------------------------------------------------
/packages/webapp/README.md:
--------------------------------------------------------------------------------
1 | # Chat webapp
2 |
3 | This project uses [Vite](https://vitejs.dev/) as a frontend build tool, and [Lit](https://lit.dev/) as a web components library.
4 |
5 | ## Available Scripts
6 |
7 | In the project directory, you can run:
8 |
9 | ### `npm run dev`
10 |
11 | To start the app in dev mode.
12 | Open [http://localhost:8000](http://localhost:8000) to view it in the browser.
13 |
14 | ### `npm run build`
15 |
16 | To build the app for production to the `dist` folder.
17 |
--------------------------------------------------------------------------------
/infra/app/linked-backend.bicep:
--------------------------------------------------------------------------------
1 | param staticWebAppName string
2 | param backendResourceId string
3 | param backendLocation string
4 |
5 | resource staticWebApp 'Microsoft.Web/staticSites@2023-12-01' existing = {
6 | name: staticWebAppName
7 | }
8 |
9 | resource linkedStaticWebAppBackend 'Microsoft.Web/staticSites/linkedBackends@2023-12-01' = {
10 | parent: staticWebApp
11 | name: 'linkedBackend'
12 | properties: {
13 | backendResourceId: backendResourceId
14 | region: backendLocation
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # Data disclaimer
2 |
3 | The documents used in this sample contain information generated using a language model (Azure OpenAI Service). The information contained in these documents is only for demonstration purposes and does not reflect the opinions or beliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, about the completeness, accuracy, reliability, suitability or availability with respect to the information contained in this document. All rights reserved to Microsoft.
4 |
--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
1 | # Support
2 |
3 | ## How to file issues and get help
4 |
5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or
7 | feature request as a new Issue.
8 |
9 | For help and questions about using this project, please use GitHub Issues and tag them with the
10 | **question** label.
11 |
12 | ## Microsoft Support Policy
13 |
14 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
15 |
--------------------------------------------------------------------------------
/packages/api/.env.sample:
--------------------------------------------------------------------------------
1 | # Azure OpenAI configuration
2 | AZURE_OPENAI_API_ENDPOINT=""
3 | AZURE_OPENAI_API_KEY=""
4 | AZURE_OPENAI_API_DEPLOYMENT_NAME=""
5 | AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME=""
6 |
7 | # Azure CosmosDB for MongoDB vCore configuration
8 | AZURE_COSMOSDB_CONNECTION_STRING=""
9 |
10 | # Azure Blob Storage configuration
11 | AZURE_STORAGE_CONNECTION_STRING=""
12 | AZURE_STORAGE_CONTAINER_NAME=""
13 |
--------------------------------------------------------------------------------
/infra/core/host/staticwebapp.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure Static Web Apps instance.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | param sku object = {
7 | name: 'Free'
8 | tier: 'Free'
9 | }
10 |
11 | resource web 'Microsoft.Web/staticSites@2023-12-01' = {
12 | name: name
13 | location: location
14 | tags: tags
15 | sku: sku
16 | properties: {
17 | provider: 'Custom'
18 | }
19 | }
20 |
21 | output name string = web.name
22 | output uri string = 'https://${web.properties.defaultHostname}'
23 |
--------------------------------------------------------------------------------
/infra/core/host/appserviceplan.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure App Service plan.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | param kind string = ''
7 | param reserved bool = true
8 | param sku object
9 |
10 | resource appServicePlan 'Microsoft.Web/serverfarms@2023-12-01' = {
11 | name: name
12 | location: location
13 | tags: tags
14 | sku: sku
15 | kind: kind
16 | properties: {
17 | reserved: reserved
18 | }
19 | }
20 |
21 | output id string = appServicePlan.id
22 | output name string = appServicePlan.name
23 |
--------------------------------------------------------------------------------
/infra/core/host/appservice-appsettings.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Updates app settings for an Azure App Service.'
2 | @description('The name of the app service resource within the current resource group scope')
3 | param name string
4 |
5 | @description('The app settings to be applied to the app service')
6 | @secure()
7 | param appSettings object
8 |
9 | resource appService 'Microsoft.Web/sites@2022-03-01' existing = {
10 | name: name
11 | }
12 |
13 | resource settings 'Microsoft.Web/sites/config@2022-03-01' = {
14 | name: 'appsettings'
15 | parent: appService
16 | properties: appSettings
17 | }
18 |
--------------------------------------------------------------------------------
/infra/core/monitor/loganalytics.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates a Log Analytics workspace.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = {
7 | name: name
8 | location: location
9 | tags: tags
10 | properties: any({
11 | retentionInDays: 30
12 | features: {
13 | searchVersion: 1
14 | }
15 | sku: {
16 | name: 'PerGB2018'
17 | }
18 | })
19 | }
20 |
21 | output id string = logAnalytics.id
22 | output name string = logAnalytics.name
23 |
--------------------------------------------------------------------------------
/packages/webapp/assets/send.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/webapp/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "webapp",
3 | "version": "1.0.0",
4 | "description": "Web app for the serverless AI Chat RAG sample",
5 | "private": true,
6 | "type": "module",
7 | "scripts": {
8 | "dev": "vite --port 8000 --host",
9 | "build": "vite build",
10 | "watch": "vite build --watch --minify false",
11 | "clean": "npx rimraf dist"
12 | },
13 | "author": "Microsoft",
14 | "license": "MIT",
15 | "dependencies": {
16 | "@microsoft/ai-chat-protocol": "^1.0.0-beta.20240814.1",
17 | "lit": "^3.0.0"
18 | },
19 | "devDependencies": {
20 | "vite": "^6.0.2"
21 | },
22 | "files": [
23 | "dist"
24 | ]
25 | }
26 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/sql/cosmos-sql-role-assign.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates a SQL role assignment under an Azure Cosmos DB account.'
2 | param accountName string
3 |
4 | param roleDefinitionId string
5 | param principalId string = ''
6 |
7 | resource role 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2022-05-15' = {
8 | parent: cosmos
9 | name: guid(roleDefinitionId, principalId, cosmos.id)
10 | properties: {
11 | principalId: principalId
12 | roleDefinitionId: roleDefinitionId
13 | scope: cosmos.id
14 | }
15 | }
16 |
17 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = {
18 | name: accountName
19 | }
20 |
--------------------------------------------------------------------------------
/infra/core/security/role.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates a role assignment for a service principal.'
2 | param principalId string
3 |
4 | @allowed([
5 | 'Device'
6 | 'ForeignGroup'
7 | 'Group'
8 | 'ServicePrincipal'
9 | 'User'
10 | ])
11 | param principalType string = 'ServicePrincipal'
12 | param roleDefinitionId string
13 |
14 | resource role 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
15 | name: guid(subscription().id, resourceGroup().id, principalId, roleDefinitionId)
16 | properties: {
17 | principalId: principalId
18 | principalType: principalType
19 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionId)
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/packages/webapp/vite.config.ts:
--------------------------------------------------------------------------------
1 | import process from 'node:process';
2 | import { defineConfig } from 'vite';
3 |
4 | // Expose environment variables to the client
5 | process.env.VITE_API_URL = process.env.API_URL ?? '';
6 | console.log(`Using chat API base URL: "${process.env.VITE_API_URL}"`);
7 |
8 | export default defineConfig({
9 | build: {
10 | outDir: './dist',
11 | emptyOutDir: true,
12 | sourcemap: true,
13 | rollupOptions: {
14 | output: {
15 | manualChunks(id) {
16 | if (id.includes('node_modules')) {
17 | return 'vendor';
18 | }
19 | },
20 | },
21 | },
22 | },
23 | server: {
24 | proxy: {
25 | '/api': 'http://127.0.0.1:7071',
26 | },
27 | },
28 | });
29 |
--------------------------------------------------------------------------------
/packages/api/README.md:
--------------------------------------------------------------------------------
1 | # Azure Functions API
2 |
3 | This project uses [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript) as a serverless API, and [LangChain.js](https://js.langchain.com/) to implement the AI capabilities.
4 |
5 | ## Available Scripts
6 |
7 | In the project directory, you can run:
8 |
9 | ### `npm start`
10 |
11 | This command will start the API in dev mode, and you will be able to access it through the URL `http://localhost:7071/api/`.
12 |
13 | You can use the `api.http` file to test the API using the [REST Client](https://marketplace.visualstudio.com/items?itemName=humao.rest-client) extension for Visual Studio Code.
14 |
15 | ### `npm run build`
16 |
17 | To build the API for production to the `dist` folder.
18 |
--------------------------------------------------------------------------------
/packages/webapp/assets/delete.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.github/workflows/stale-bot.yaml:
--------------------------------------------------------------------------------
1 | name: Close stale issues and PRs
2 | on:
3 | schedule:
4 | - cron: '30 1 * * *'
5 |
6 | jobs:
7 | stale:
8 | runs-on: ubuntu-latest
9 | permissions:
10 | contents: write
11 | issues: write
12 | pull-requests: write
13 | steps:
14 | - uses: actions/stale@v9
15 | with:
16 | stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this issue will be closed.'
17 | stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed.'
18 | close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity.'
19 | close-pr-message: 'This PR was closed because it has been stalled for 7 days with no activity.'
20 | days-before-issue-stale: 60
21 | days-before-pr-stale: 60
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
4 |
5 | > ## Please provide us with the following information:
6 |
7 | ### This issue is for a: (mark with an `x`)
8 |
9 | ```
10 | - [ ] bug report -> please search issues before submitting
11 | - [ ] feature request
12 | - [ ] documentation issue or request
13 | - [ ] regression (a behavior that used to work and stopped in a new release)
14 | ```
15 |
16 | ### Minimal steps to reproduce
17 |
18 | >
19 |
20 | ### Any log messages given by the failure
21 |
22 | >
23 |
24 | ### Expected/desired behavior
25 |
26 | >
27 |
28 | ### OS and Version?
29 |
30 | > Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?)
31 |
32 | ### Versions
33 |
34 | >
35 |
36 | ### Mention any other details that might be useful
37 |
38 | > ---
39 | >
40 | > Thanks! We'll be in touch soon.
41 |
--------------------------------------------------------------------------------
/infra/core/database/cosmos/sql/cosmos-sql-role-def.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates a SQL role definition under an Azure Cosmos DB account.'
2 | param accountName string
3 |
4 | resource roleDefinition 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions@2022-08-15' = {
5 | parent: cosmos
6 | name: guid(cosmos.id, accountName, 'sql-role')
7 | properties: {
8 | assignableScopes: [
9 | cosmos.id
10 | ]
11 | permissions: [
12 | {
13 | dataActions: [
14 | 'Microsoft.DocumentDB/databaseAccounts/readMetadata'
15 | 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/items/*'
16 | 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/*'
17 | ]
18 | notDataActions: []
19 | }
20 | ]
21 | roleName: 'Reader Writer'
22 | type: 'CustomRole'
23 | }
24 | }
25 |
26 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = {
27 | name: accountName
28 | }
29 |
30 | output id string = roleDefinition.id
31 |
--------------------------------------------------------------------------------
/packages/api/src/http-response.ts:
--------------------------------------------------------------------------------
1 | import { HttpResponseInit } from '@azure/functions';
2 |
3 | export function badRequest(message: string): HttpResponseInit {
4 | return {
5 | status: 400,
6 | jsonBody: {
7 | error: message,
8 | },
9 | };
10 | }
11 |
12 | export function notFound(message: string): HttpResponseInit {
13 | return {
14 | status: 404,
15 | jsonBody: {
16 | error: message,
17 | },
18 | };
19 | }
20 |
21 | export function serviceUnavailable(message: string): HttpResponseInit {
22 | return {
23 | status: 503,
24 | jsonBody: {
25 | error: message,
26 | },
27 | };
28 | }
29 |
30 | export function ok(body?: object): HttpResponseInit {
31 | return body
32 | ? {
33 | status: 200,
34 | jsonBody: body,
35 | }
36 | : { status: 204 };
37 | }
38 |
39 | export function data(body: Uint8Array | AsyncIterable, headers: Record): HttpResponseInit {
40 | return {
41 | status: 200,
42 | headers,
43 | body,
44 | };
45 | }
46 |
--------------------------------------------------------------------------------
/packages/webapp/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "esnext",
4 | "module": "esnext",
5 | "lib": ["esnext", "DOM", "DOM.Iterable"],
6 | "strict": true,
7 | "outDir": "./dist",
8 | "rootDir": "./src",
9 | "declaration": true,
10 | "declarationMap": true,
11 | "sourceMap": true,
12 | "inlineSources": true,
13 | "noUnusedLocals": true,
14 | "noUnusedParameters": true,
15 | "noImplicitReturns": true,
16 | "noFallthroughCasesInSwitch": true,
17 | "noImplicitAny": false,
18 | "noImplicitThis": true,
19 | "moduleResolution": "node",
20 | "allowSyntheticDefaultImports": true,
21 | "experimentalDecorators": true,
22 | "forceConsistentCasingInFileNames": true,
23 | "noImplicitOverride": true,
24 | "emitDeclarationOnly": true,
25 | "useDefineForClassFields": false,
26 | "plugins": [
27 | {
28 | "name": "ts-lit-plugin",
29 | "strict": true
30 | }
31 | ]
32 | },
33 | "include": ["src/**/*.ts"],
34 | "types": ["vite/client"]
35 | }
36 |
--------------------------------------------------------------------------------
/azure.yaml:
--------------------------------------------------------------------------------
1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
2 |
3 | name: serverless-chat-langchainjs
4 | metadata:
5 | template: serverless-chat-langchainjs@1.0.0
6 |
7 | services:
8 | webapp:
9 | project: ./packages/webapp
10 | dist: dist
11 | language: ts
12 | host: staticwebapp
13 | hooks:
14 | predeploy:
15 | windows:
16 | shell: pwsh
17 | run: npm run build
18 | posix:
19 | shell: sh
20 | run: npm run build
21 |
22 | api:
23 | project: ./packages/api
24 | language: ts
25 | host: function
26 |
27 | hooks:
28 | postprovision:
29 | windows:
30 | shell: pwsh
31 | run: azd env get-values > packages/api/.env
32 | posix:
33 | shell: sh
34 | run: azd env get-values > packages/api/.env
35 | postup:
36 | windows:
37 | shell: pwsh
38 | run: node scripts/upload-documents.js "$env:UPLOAD_URL"
39 | posix:
40 | shell: sh
41 | run: node scripts/upload-documents.js "$UPLOAD_URL"
42 |
--------------------------------------------------------------------------------
/.github/workflows/validate-infra.yaml:
--------------------------------------------------------------------------------
1 | name: Validate AZD template
2 | on:
3 | push:
4 | branches: [main]
5 | paths:
6 | - 'infra/**'
7 | pull_request:
8 | branches: [main]
9 | paths:
10 | - 'infra/**'
11 |
12 | jobs:
13 | build:
14 | runs-on: ubuntu-latest
15 | permissions:
16 | security-events: write
17 | steps:
18 | - name: Checkout
19 | uses: actions/checkout@v4
20 |
21 | - name: Build Bicep for linting
22 | uses: azure/CLI@v2
23 | with:
24 | inlineScript: az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout
25 |
26 | - name: Run Microsoft Security DevOps Analysis
27 | uses: microsoft/security-devops-action@preview
28 | id: msdo
29 | continue-on-error: true
30 | with:
31 | tools: templateanalyzer
32 |
33 | - name: Upload alerts to Security tab
34 | if: github.repository_owner == 'Azure-Samples'
35 | uses: github/codeql-action/upload-sarif@v3
36 | with:
37 | sarif_file: ${{ steps.msdo.outputs.sarifFile }}
38 |
--------------------------------------------------------------------------------
/packages/webapp/src/api.ts:
--------------------------------------------------------------------------------
1 | import { AIChatMessage, AIChatCompletionDelta, AIChatProtocolClient } from '@microsoft/ai-chat-protocol';
2 |
3 | export const apiBaseUrl: string = import.meta.env.VITE_API_URL || '';
4 |
5 | export type ChatRequestOptions = {
6 | messages: AIChatMessage[];
7 | context?: Record;
8 | chunkIntervalMs: number;
9 | apiUrl: string;
10 | };
11 |
12 | export async function* getCompletion(options: ChatRequestOptions) {
13 | const apiUrl = options.apiUrl || apiBaseUrl;
14 | const client = new AIChatProtocolClient(`${apiUrl}/api/chats`);
15 | const result = await client.getStreamedCompletion(options.messages, { context: options.context });
16 |
17 | for await (const response of result) {
18 | if (!response.delta) {
19 | continue;
20 | }
21 |
22 | yield new Promise((resolve) => {
23 | setTimeout(() => {
24 | resolve(response);
25 | }, options.chunkIntervalMs);
26 | });
27 | }
28 | }
29 |
30 | export function getCitationUrl(citation: string): string {
31 | return `${apiBaseUrl}/api/documents/${citation}`;
32 | }
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
--------------------------------------------------------------------------------
/infra/core/monitor/applicationinsights.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Application Insights instance based on an existing Log Analytics workspace.'
2 | param name string
3 | param dashboardName string = ''
4 | param location string = resourceGroup().location
5 | param tags object = {}
6 | param logAnalyticsWorkspaceId string
7 |
8 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = {
9 | name: name
10 | location: location
11 | tags: tags
12 | kind: 'web'
13 | properties: {
14 | Application_Type: 'web'
15 | WorkspaceResourceId: logAnalyticsWorkspaceId
16 | }
17 | }
18 |
19 | module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = if (!empty(dashboardName)) {
20 | name: 'application-insights-dashboard'
21 | params: {
22 | name: dashboardName
23 | location: location
24 | applicationInsightsName: applicationInsights.name
25 | }
26 | }
27 |
28 | output connectionString string = applicationInsights.properties.ConnectionString
29 | output id string = applicationInsights.id
30 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey
31 | output name string = applicationInsights.name
32 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Purpose
2 |
3 |
4 |
5 | - ...
6 |
7 | ## Does this introduce a breaking change?
8 |
9 |
10 |
11 | ```
12 | [ ] Yes
13 | [ ] No
14 | ```
15 |
16 | ## Pull Request Type
17 |
18 | What kind of change does this Pull Request introduce?
19 |
20 |
21 |
22 | ```
23 | [ ] Bugfix
24 | [ ] Feature
25 | [ ] Code style update (formatting, local variables)
26 | [ ] Refactoring (no functional changes, no api changes)
27 | [ ] Documentation content changes
28 | [ ] Other... Please describe:
29 | ```
30 |
31 | ## How to Test
32 |
33 | - Get the code
34 |
35 | ```
36 | git clone [repo-address]
37 | cd [repo-name]
38 | git checkout [branch-name]
39 | npm install
40 | ```
41 |
42 | - Test the code
43 |
44 |
45 | ```
46 |
47 | ```
48 |
49 | ## What to Check
50 |
51 | Verify that the following are valid
52 |
53 | - ...
54 |
55 | ## Other Information
56 |
57 |
58 |
--------------------------------------------------------------------------------
/.github/workflows/build-test.yaml:
--------------------------------------------------------------------------------
1 | name: Build and test
2 | on:
3 | push:
4 | branches: [main]
5 | pull_request:
6 | branches: [main]
7 |
8 | jobs:
9 | build_test:
10 | strategy:
11 | matrix:
12 | platform: [ubuntu-latest, macos-latest, windows-latest]
13 | node-version: ['20', '22']
14 |
15 | name: ${{ matrix.platform }} / Node.js v${{ matrix.node-version }}
16 | runs-on: ${{ matrix.platform }}
17 | steps:
18 | - run: git config --global core.autocrlf false # Preserve line endings
19 | - uses: actions/checkout@v4
20 | - name: Setup Node.js v${{ matrix.node-version }}
21 | uses: actions/setup-node@v4
22 | with:
23 | node-version: ${{ matrix.node-version }}
24 | - name: Install dependencies
25 | run: npm ci
26 | - name: Build packages
27 | run: npm run build
28 | - name: Lint packages
29 | run: npm run lint
30 | - name: Test packages
31 | run: npm test --if-present
32 |
33 | build_test_all:
34 | if: always()
35 | runs-on: ubuntu-latest
36 | needs: build_test
37 | steps:
38 | - name: Check build matrix status
39 | if: ${{ needs.build_test.result != 'success' }}
40 | run: exit 1
41 |
--------------------------------------------------------------------------------
/packages/webapp/assets/new-chat.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/webapp/assets/question.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/api/api.http:
--------------------------------------------------------------------------------
1 | ##################################################################
2 | # VS Code with REST Client extension is needed to use this file.
3 | # Download at: https://aka.ms/vscode/rest-client
4 | ##################################################################
5 |
6 | @api_host = http://localhost:7071
7 |
8 | ### Upload PDF Document
9 | POST {{api_host}}/api/documents
10 | Accept: */*
11 | Content-Type: multipart/form-data; boundary=Boundary
12 |
13 | --Boundary
14 | Content-Disposition: form-data; name="file"; filename="support.pdf"
15 | Content-Type: application/pdf
16 |
17 | < ../../data/support.pdf
18 | --Boundary--
19 |
20 | ### Retrieve PDF document
21 | GET {{api_host}}/api/documents/support.pdf
22 |
23 | ### Chat with the bot
24 | POST {{api_host}}/api/chats/stream?userId=1
25 | Content-Type: application/json
26 |
27 | {
28 | "messages": [
29 | {
30 | "content": "How to Search and Book Rentals?",
31 | "role": "user"
32 | }
33 | ],
34 | "context": {
35 | "sessionId": "123"
36 | }
37 | }
38 |
39 | ### Retrieve all chat sessions
40 | GET {{api_host}}/api/chats?userId=1
41 |
42 | ### Retrieve a chat session
43 | GET {{api_host}}/api/chats/123?userId=1
44 |
45 | ### Delete a chat session
46 | DELETE {{api_host}}/api/chats/123?userId=1
47 |
--------------------------------------------------------------------------------
/docs/cost.md:
--------------------------------------------------------------------------------
1 | ## Cost estimation
2 |
3 | Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage.
4 | However, you can use the [Azure pricing calculator](https://azure.com/e/aa7deadafa0f4980a91308de010299bc) for the resources below to get an estimate.
5 |
6 | - Azure Functions: Consumption plan, Free for the first 1M executions. Pricing per execution and memory used. [Pricing](https://azure.microsoft.com/pricing/details/functions/)
7 | - Azure Static Web Apps: Free tier, 100GB bandwidth. Pricing per GB served. [Pricing](https://azure.microsoft.com/pricing/details/app-service/static/)
8 | - Azure OpenAI: Standard tier, GPT and Ada models. Pricing per 1K tokens used, and at least 1K tokens are used per question. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/)
9 | - Azure Cosmos DB: Serverless tier. Pricing per request unit (RU). [Pricing](https://azure.microsoft.com/pricing/details/cosmos-db/autoscale-provisioned/)
10 | - Azure Blob Storage: Standard tier with LRS. Pricing per GB stored and data transfer. [Pricing](https://azure.microsoft.com/pricing/details/storage/blobs/)
11 |
12 | ⚠️ To avoid unnecessary costs, remember to take down your app if it's no longer in use,
13 | either by deleting the resource group in the Portal or running `azd down --purge`.
14 |
--------------------------------------------------------------------------------
/packages/api/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "api",
3 | "version": "1.0.0",
4 | "description": "Azure Functions API for the serverless AI Chat RAG sample",
5 | "main": "dist/src/functions/*.js",
6 | "scripts": {
7 | "build": "tsc",
8 | "watch": "tsc -w",
9 | "clean": "rimraf dist",
10 | "prestart": "npm run clean && npm run build",
11 | "start:host": "func start",
12 | "start": "concurrently npm:start:host npm:watch --raw --kill-others"
13 | },
14 | "author": "Microsoft",
15 | "license": "MIT",
16 | "dependencies": {
17 | "@azure/functions": "^4.0.0",
18 | "@azure/identity": "^4.2.0",
19 | "@azure/search-documents": "^12.0.0",
20 | "@azure/storage-blob": "^12.17.0",
21 | "@langchain/azure-cosmosdb": "^0.2.2",
22 | "@langchain/community": "^0.3.15",
23 | "@langchain/core": "^0.3.18",
24 | "@langchain/ollama": "^0.2.0",
25 | "@langchain/openai": "^0.5.10",
26 | "@langchain/textsplitters": "^0.1.0",
27 | "@microsoft/ai-chat-protocol": "^1.0.0-beta.20240814.1",
28 | "dotenv": "^16.4.5",
29 | "faiss-node": "^0.5.1",
30 | "langchain": "^0.3.6",
31 | "pdf-parse": "^1.1.1",
32 | "uuid": "^11.0.3"
33 | },
34 | "devDependencies": {
35 | "@types/node": "^20.x",
36 | "azure-functions-core-tools": "^4.0.5611",
37 | "typescript": "^5.4.4"
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/infra/core/monitor/monitoring.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Application Insights instance and a Log Analytics workspace.'
2 | param logAnalyticsName string
3 | param applicationInsightsName string
4 | param applicationInsightsDashboardName string = ''
5 | param location string = resourceGroup().location
6 | param tags object = {}
7 |
8 | module logAnalytics 'loganalytics.bicep' = {
9 | name: 'loganalytics'
10 | params: {
11 | name: logAnalyticsName
12 | location: location
13 | tags: tags
14 | }
15 | }
16 |
17 | module applicationInsights 'applicationinsights.bicep' = {
18 | name: 'applicationinsights'
19 | params: {
20 | name: applicationInsightsName
21 | location: location
22 | tags: tags
23 | dashboardName: applicationInsightsDashboardName
24 | logAnalyticsWorkspaceId: logAnalytics.outputs.id
25 | }
26 | }
27 |
28 | output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString
29 | output applicationInsightsId string = applicationInsights.outputs.id
30 | output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey
31 | output applicationInsightsName string = applicationInsights.outputs.name
32 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id
33 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name
34 |
--------------------------------------------------------------------------------
/packages/api/src/security.ts:
--------------------------------------------------------------------------------
1 | import { HttpRequest } from '@azure/functions';
2 | import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
3 |
4 | const azureOpenAiScope = 'https://cognitiveservices.azure.com/.default';
5 |
6 | let credentials: DefaultAzureCredential | undefined;
7 |
8 | export function getCredentials(): DefaultAzureCredential {
9 | // Use the current user identity to authenticate.
10 | // No secrets needed, it uses `az login` or `azd auth login` locally,
11 | // and managed identity when deployed on Azure.
12 | credentials ||= new DefaultAzureCredential();
13 | return credentials;
14 | }
15 |
16 | export function getAzureOpenAiTokenProvider() {
17 | return getBearerTokenProvider(getCredentials(), azureOpenAiScope);
18 | }
19 |
20 | export function getUserId(request: HttpRequest, body?: any): string | undefined {
21 | let userId: string | undefined;
22 |
23 | // Get the user ID from Azure easy auth if it's available
24 | try {
25 | const token = Buffer.from(request.headers.get('x-ms-client-principal') ?? '', 'base64').toString('ascii');
26 | const infos = token && JSON.parse(token);
27 | userId = infos?.userId;
28 | } catch {}
29 |
30 | // Get the user ID from the request as a fallback
31 | userId ??= body?.context?.userId ?? request.query.get('userId') ?? undefined;
32 |
33 | return userId;
34 | }
35 |
--------------------------------------------------------------------------------
/infra/main.parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
3 | "contentVersion": "1.0.0.0",
4 | "parameters": {
5 | "environmentName": {
6 | "value": "${AZURE_ENV_NAME}"
7 | },
8 | "resourceGroupName": {
9 | "value": "${AZURE_RESOURCE_GROUP}"
10 | },
11 | "location": {
12 | "value": "${AZURE_LOCATION}"
13 | },
14 | "principalId": {
15 | "value": "${AZURE_PRINCIPAL_ID}"
16 | },
17 | "openAiLocation": {
18 | "value": "${AZURE_OPENAI_LOCATION=eastus2}"
19 | },
20 | "openAiApiVersion": {
21 | "value": "${AZURE_OPENAI_API_VERSION=2024-02-01}"
22 | },
23 | "chatModelName": {
24 | "value": "${AZURE_OPENAI_API_MODEL=gpt-4o-mini}"
25 | },
26 | "chatModelVersion": {
27 | "value": "${AZURE_OPENAI_API_MODEL_VERSION=2024-07-18}"
28 | },
29 | "embeddingsModelName": {
30 | "value": "${AZURE_OPENAI_API_EMBEDDINGS_MODEL=text-embedding-ada-002}"
31 | },
32 | "embeddingsModelVersion": {
33 | "value": "${AZURE_OPENAI_API_EMBEDDINGS_MODEL_VERSION=2}"
34 | },
35 | "webappLocation": {
36 | "value": "${AZURE_WEBAPP_LOCATION=eastus2}"
37 | },
38 | "useVnet": {
39 | "value": "${USE_VNET=false}"
40 | },
41 | "isContinuousDeployment": {
42 | "value": "${CI=false}"
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "type": "func",
6 | "label": "func: host start",
7 | "command": "host start",
8 | "problemMatcher": "$func-node-watch",
9 | "isBackground": true,
10 | "dependsOn": "npm build (functions)",
11 | "options": {
12 | "cwd": "${workspaceFolder}/packages/api"
13 | }
14 | },
15 | {
16 | "type": "shell",
17 | "label": "npm build (functions)",
18 | "command": "npm run build",
19 | "dependsOn": "npm clean (functions)",
20 | "problemMatcher": "$tsc",
21 | "options": {
22 | "cwd": "${workspaceFolder}/packages/api"
23 | }
24 | },
25 | {
26 | "type": "shell",
27 | "label": "npm install (functions)",
28 | "command": "npm install",
29 | "options": {
30 | "cwd": "${workspaceFolder}/packages/api"
31 | }
32 | },
33 | {
34 | "type": "shell",
35 | "label": "npm prune (functions)",
36 | "command": "npm prune --production",
37 | "dependsOn": "npm build (functions)",
38 | "problemMatcher": [],
39 | "options": {
40 | "cwd": "${workspaceFolder}/packages/api"
41 | }
42 | },
43 | {
44 | "type": "shell",
45 | "label": "npm clean (functions)",
46 | "command": "npm run clean",
47 | "dependsOn": "npm install (functions)",
48 | "options": {
49 | "cwd": "${workspaceFolder}/packages/api"
50 | }
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/docs/enhance-security.md:
--------------------------------------------------------------------------------
1 | # Enhance security
2 |
3 | To achieve enterprise grade security we've ensured you can leverage the features below through an opt-in flag:
4 |
5 | - **Deploy in a [virtual network](https://learn.microsoft.com/azure/virtual-network/virtual-networks-overview)**, to restrict access to the resources including the Azure Functions API and the Azure Storage where the documents are stored.
6 |
7 | - **Leverage [Azure Entra managed identity](https://learn.microsoft.com/entra/identity/managed-identities-azure-resources/overview)** to disable all local authentication methods (ie API keys) and rely [Role-based Access Control (RBAC)](https://learn.microsoft.com/azure/role-based-access-control/overview).
8 |
9 | You can enable these features when deploying this sample by following these steps:
10 |
11 | 1. Create a new environment for your deployment (you cannot update an existing one):
12 | ```bash
13 | azd env create my-secure-env
14 | ```
15 | 2. Enable the virtual network feature and disable local authentication:
16 | ```bash
17 | azd env set USE_VNET true
18 | ```
19 | 3. Deploy the sample to the new environment:
20 | ```bash
21 | azd up
22 | ```
23 |
24 | Note that enabling virtual network will induce additional costs, as it requires the deployment of extra resources and needs to switch to paid plans for the Azure Functions and Azure Static Web App.
25 |
26 | > [!IMPORTANT]
27 | > When VNET is enabled, you will lose the ability to run the sample locally while connected to Azure resources.
28 | > You can always fall back to using a local AI model and database for development purposes, by deleting the `api/.env` file
29 |
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2 | // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node
3 | {
4 | "name": "Node.js",
5 |
6 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
7 | "image": "mcr.microsoft.com/devcontainers/javascript-node:20-bullseye",
8 |
9 | // Features to add to the dev container. More info: https://containers.dev/features.
10 | "features": {
11 | "ghcr.io/devcontainers/features/node:1": {
12 | "version": "20"
13 | },
14 | "ghcr.io/devcontainers/features/azure-cli:1": {
15 | "version": "latest",
16 | "installBicep": true
17 | },
18 | "ghcr.io/devcontainers/features/github-cli:1": {},
19 | "ghcr.io/devcontainers/features/powershell:1": {},
20 | "ghcr.io/azure/azure-dev/azd:latest": {}
21 | },
22 |
23 | // Configure tool-specific properties.
24 | "customizations": {
25 | "vscode": {
26 | "extensions": [
27 | "ms-azuretools.azure-dev",
28 | "ms-azuretools.vscode-bicep",
29 | "esbenp.prettier-vscode",
30 | "humao.rest-client",
31 | "runem.lit-plugin"
32 | ]
33 | }
34 | },
35 |
36 | // Use 'forwardPorts' to make a list of ports inside the container available locally.
37 | "forwardPorts": [8000, 7071],
38 |
39 | // Use 'postCreateCommand' to run commands after the container is created.
40 | "postCreateCommand": "npm install && npm install -g fuzz-run"
41 |
42 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
43 | // "remoteUser": "root"
44 | }
45 |
--------------------------------------------------------------------------------
/scripts/upload-documents.js:
--------------------------------------------------------------------------------
1 | import fs from 'node:fs/promises';
2 | import path from 'node:path';
3 |
4 | // This script uploads all PDF files from the 'data' folder to the ingestion API.
5 | // It does a Node.js equivalent of this bash script:
6 | // ```
7 | // for file in data/*.pdf; do
8 | // curl -X POST -F "file=@$file" /api/documents
9 | // done
10 | // ```
11 | async function uploadDocuments(apiUrl, dataFolder) {
12 | try {
13 | const uploadUrl = `${apiUrl}/api/documents`;
14 | const files = await fs.readdir(dataFolder);
15 | console.log(`Uploading documents to: ${uploadUrl}`);
16 |
17 | /* eslint-disable no-await-in-loop */
18 | for (const file of files) {
19 | if (path.extname(file).toLowerCase() === '.pdf') {
20 | const data = await fs.readFile(path.join(dataFolder, file));
21 | const blobParts = new Array(data);
22 | const formData = new FormData();
23 | formData.append('file', new File(blobParts, file));
24 |
25 | const response = await fetch(uploadUrl, {
26 | method: 'post',
27 | body: formData,
28 | });
29 |
30 | const responseData = await response.json();
31 | if (response.ok) {
32 | console.log(`${file}: ${responseData.message}`);
33 | } else {
34 | throw new Error(responseData.error);
35 | }
36 | }
37 | }
38 | /* eslint-enable no-await-in-loop */
39 | } catch (error) {
40 | console.error(`Could not upload documents: ${error.message}`);
41 | process.exitCode = -1;
42 | }
43 | }
44 |
45 | const apiUrl = process.argv[2];
46 | if (apiUrl) {
47 | await uploadDocuments(apiUrl, 'data');
48 | } else {
49 | console.log('Usage: node upload-documents.js ');
50 | process.exitCode = -1;
51 | }
52 |
--------------------------------------------------------------------------------
/infra/core/ai/cognitiveservices.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure Cognitive Services instance.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 | @description('The custom subdomain name used to access the API. Defaults to the value of the name parameter.')
6 | param customSubDomainName string = name
7 | param disableLocalAuth bool = false
8 | param deployments array = []
9 | param kind string = 'OpenAI'
10 |
11 | @allowed([ 'Enabled', 'Disabled' ])
12 | param publicNetworkAccess string = 'Enabled'
13 | param sku object = {
14 | name: 'S0'
15 | }
16 |
17 | param allowedIpRules array = []
18 | param networkAcls object = empty(allowedIpRules) ? {
19 | defaultAction: 'Allow'
20 | } : {
21 | ipRules: allowedIpRules
22 | defaultAction: 'Deny'
23 | }
24 |
25 | resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = {
26 | name: name
27 | location: location
28 | tags: tags
29 | kind: kind
30 | properties: {
31 | customSubDomainName: customSubDomainName
32 | publicNetworkAccess: publicNetworkAccess
33 | networkAcls: networkAcls
34 | disableLocalAuth: disableLocalAuth
35 | }
36 | sku: sku
37 | }
38 |
39 | @batchSize(1)
40 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: {
41 | parent: account
42 | name: deployment.name
43 | properties: {
44 | model: deployment.model
45 | raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
46 | }
47 | sku: contains(deployment, 'sku') ? deployment.sku : {
48 | name: 'Standard'
49 | capacity: 20
50 | }
51 | }]
52 |
53 | output endpoint string = account.properties.endpoint
54 | output endpoints object = account.properties.endpoints
55 | output id string = account.id
56 | output name string = account.name
57 |
--------------------------------------------------------------------------------
/docs/troubleshooting.md:
--------------------------------------------------------------------------------
1 | ## Troubleshooting
2 |
3 | Here are the most common failure scenarios and solutions:
4 |
5 | 1. The subscription (`AZURE_SUBSCRIPTION_ID`) doesn't have access to the Azure OpenAI service. Please ensure `AZURE_SUBSCRIPTION_ID` matches the ID specified in the [OpenAI access request process](https://aka.ms/oai/access).
6 |
7 | 1. You're attempting to create resources in regions not enabled for Azure OpenAI (e.g. East US 2 instead of East US), or where the model you're trying to use isn't enabled. See [this matrix of model availability](https://aka.ms/oai/models).
8 |
9 | 1. You've exceeded a quota, most often number of resources per region. See [this article on quotas and limits](https://aka.ms/oai/quotas). If this happens, you have a few options:
10 |
11 | - Delete other unused resources in the region you're trying to deploy to.
12 | - Deploy to a different region.
13 | - Try to use a different model or adjust the capacity in `infra/main.bicep` (see [the FAQ](faq.md)).
14 | - Request a quota increase.
15 |
16 | 1. You're getting "same resource name not allowed" conflicts. That's likely because you've run the sample multiple times and deleted the resources you've been creating each time, but are forgetting to purge them. Azure keeps resources for 48 hours unless you purge from soft delete. See [this article on purging resources](https://learn.microsoft.com/azure/ai-services/recover-purge-resources?tabs=azure-portal#purge-a-deleted-resource).
17 |
18 | 1. After running `azd up` and visiting the website, the website takes a long time to load and answer on the first request. Because we're using serverless technologies, the first request to the Azure Functions API might take a few seconds to start up. This happens because the service is scaled to zero when not in use to optimize the costs, and it takes a few seconds to start up when it's first accessed. However, this slight delay can be removed by using the [Azure Functions Premium plan](https://learn.microsoft.com/azure/azure-functions/functions-premium-plan).
19 |
--------------------------------------------------------------------------------
/packages/api/src/functions/chats-delete.ts:
--------------------------------------------------------------------------------
1 | import process from 'node:process';
2 | import { HttpRequest, HttpResponseInit, InvocationContext, app } from '@azure/functions';
3 | import { AzureCosmsosDBNoSQLChatMessageHistory } from '@langchain/azure-cosmosdb';
4 | import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system';
5 | import 'dotenv/config';
6 | import { badRequest, ok, notFound } from '../http-response.js';
7 | import { getCredentials, getUserId } from '../security.js';
8 |
9 | async function deleteChats(request: HttpRequest, context: InvocationContext): Promise {
10 | const azureCosmosDbEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT;
11 | const { sessionId } = request.params;
12 | const userId = getUserId(request);
13 |
14 | if (!userId) {
15 | return badRequest('Invalid or missing userId in the request');
16 | }
17 |
18 | if (!sessionId) {
19 | return badRequest('Invalid or missing sessionId in the request');
20 | }
21 |
22 | try {
23 | let chatHistory;
24 |
25 | if (azureCosmosDbEndpoint) {
26 | const credentials = getCredentials();
27 | chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({
28 | sessionId,
29 | userId,
30 | credentials,
31 | });
32 | } else {
33 | // If no environment variables are set, it means we are running locally
34 | context.log('No Azure CosmosDB endpoint set, using local file');
35 |
36 | chatHistory = new FileSystemChatMessageHistory({
37 | sessionId,
38 | userId,
39 | });
40 | }
41 |
42 | await chatHistory.clear();
43 | return ok();
44 | } catch (_error: unknown) {
45 | const error = _error as Error;
46 | context.error(`Error when processing chats-delete request: ${error.message}`);
47 |
48 | return notFound('Session not found');
49 | }
50 | }
51 |
52 | app.http('chats-delete', {
53 | route: 'chats/{sessionId}',
54 | methods: ['DELETE'],
55 | authLevel: 'anonymous',
56 | handler: deleteChats,
57 | });
58 |
--------------------------------------------------------------------------------
/packages/webapp/src/message-parser.ts:
--------------------------------------------------------------------------------
1 | import { type HTMLTemplateResult, html, nothing } from 'lit';
2 | import { AIChatMessage } from '@microsoft/ai-chat-protocol';
3 |
4 | export type ParsedMessage = {
5 | html: HTMLTemplateResult;
6 | citations: string[];
7 | followupQuestions: string[];
8 | role: string;
9 | context?: object;
10 | };
11 |
12 | export function parseMessageIntoHtml(
13 | message: AIChatMessage,
14 | renderCitationReference: (citation: string, index: number) => HTMLTemplateResult,
15 | ): ParsedMessage {
16 | if (message.role === 'user') {
17 | return {
18 | html: html`${message.content}`,
19 | citations: [],
20 | followupQuestions: [],
21 | role: message.role,
22 | context: message.context,
23 | };
24 | }
25 |
26 | const citations: string[] = [];
27 | const followupQuestions: string[] = [];
28 |
29 | // Extract any follow-up questions that might be in the message
30 | const text = message.content
31 | .replaceAll(/<<([^>]+)>>/g, (_match, content: string) => {
32 | followupQuestions.push(content);
33 | return '';
34 | })
35 | .split('<<')[0] // Truncate incomplete questions
36 | .trim();
37 |
38 | // Extract any citations that might be in the message
39 | const parts = text.split(/\[([^\]]+)]/g);
40 | const result = html`${parts.map((part, index) => {
41 | if (index % 2 === 0) {
42 | return html`${part}`;
43 | }
44 |
45 | if (index + 1 < parts.length) {
46 | // Handle only completed citations
47 | let citationIndex = citations.indexOf(part);
48 | if (citationIndex === -1) {
49 | citations.push(part);
50 | citationIndex = citations.length;
51 | } else {
52 | citationIndex++;
53 | }
54 |
55 | return renderCitationReference(part, citationIndex);
56 | }
57 |
58 | return nothing;
59 | })}`;
60 |
61 | return {
62 | html: result,
63 | citations,
64 | followupQuestions,
65 | role: message.role,
66 | context: message.context,
67 | };
68 | }
69 |
--------------------------------------------------------------------------------
/packages/api/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | lerna-debug.log*
8 |
9 | # Diagnostic reports (https://nodejs.org/api/report.html)
10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
11 |
12 | # Runtime data
13 | pids
14 | *.pid
15 | *.seed
16 | *.pid.lock
17 |
18 | # Directory for instrumented libs generated by jscoverage/JSCover
19 | lib-cov
20 |
21 | # Coverage directory used by tools like istanbul
22 | coverage
23 |
24 | # nyc test coverage
25 | .nyc_output
26 |
27 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
28 | .grunt
29 |
30 | # Bower dependency directory (https://bower.io/)
31 | bower_components
32 |
33 | # node-waf configuration
34 | .lock-wscript
35 |
36 | # Compiled binary addons (https://nodejs.org/api/addons.html)
37 | build/Release
38 |
39 | # Dependency directories
40 | node_modules/
41 | jspm_packages/
42 |
43 | # TypeScript v1 declaration files
44 | typings/
45 |
46 | # Optional npm cache directory
47 | .npm
48 |
49 | # Optional eslint cache
50 | .eslintcache
51 |
52 | # Optional REPL history
53 | .node_repl_history
54 |
55 | # Output of 'npm pack'
56 | *.tgz
57 |
58 | # Yarn Integrity file
59 | .yarn-integrity
60 |
61 | # dotenv environment variables file
62 | .env
63 | .env.test
64 |
65 | # parcel-bundler cache (https://parceljs.org/)
66 | .cache
67 |
68 | # next.js build output
69 | .next
70 |
71 | # nuxt.js build output
72 | .nuxt
73 |
74 | # vuepress build output
75 | .vuepress/dist
76 |
77 | # Serverless directories
78 | .serverless/
79 |
80 | # FuseBox cache
81 | .fusebox/
82 |
83 | # DynamoDB Local files
84 | .dynamodb/
85 |
86 | # TypeScript output
87 | dist
88 | out
89 |
90 | # Azure Functions artifacts
91 | bin
92 | obj
93 | appsettings.json
94 | local.settings.json
95 |
96 | # Azurite artifacts
97 | __blobstorage__
98 | __queuestorage__
99 | __azurite_db*__.json
100 |
101 | # Faiss folder
102 | .faiss/
103 |
104 | # Chat history
105 | .history/
106 |
--------------------------------------------------------------------------------
/infra/app/api.bicep:
--------------------------------------------------------------------------------
1 | param name string
2 | param location string = resourceGroup().location
3 | param tags object = {}
4 |
5 | param appServicePlanId string
6 | param storageAccountName string
7 | param virtualNetworkSubnetId string
8 | param applicationInsightsName string
9 | param allowedOrigins array
10 | param appSettings object
11 | param staticWebAppName string = ''
12 |
13 | var useVnet = !empty(virtualNetworkSubnetId)
14 | var finalApi = useVnet ? apiFlex : api
15 |
16 | module apiFlex '../core/host/functions-flex.bicep' = if (useVnet) {
17 | name: 'api-flex'
18 | scope: resourceGroup()
19 | params: {
20 | name: name
21 | location: location
22 | tags: tags
23 | allowedOrigins: allowedOrigins
24 | alwaysOn: false
25 | runtimeName: 'node'
26 | runtimeVersion: '20'
27 | appServicePlanId: appServicePlanId
28 | storageAccountName: storageAccountName
29 | applicationInsightsName: applicationInsightsName
30 | virtualNetworkSubnetId: virtualNetworkSubnetId
31 | appSettings: appSettings
32 | }
33 | }
34 |
35 | module api '../core/host/functions.bicep' = if (!useVnet) {
36 | name: 'api-consumption'
37 | scope: resourceGroup()
38 | params: {
39 | name: name
40 | location: location
41 | tags: tags
42 | allowedOrigins: allowedOrigins
43 | alwaysOn: false
44 | runtimeName: 'node'
45 | runtimeVersion: '20'
46 | appServicePlanId: appServicePlanId
47 | storageAccountName: storageAccountName
48 | applicationInsightsName: applicationInsightsName
49 | managedIdentity: true
50 | appSettings: appSettings
51 | }
52 | }
53 |
54 | // Link the Function App to the Static Web App
55 | module linkedBackend './linked-backend.bicep' = if (useVnet) {
56 | name: 'linkedbackend'
57 | scope: resourceGroup()
58 | params: {
59 | staticWebAppName: staticWebAppName
60 | backendResourceId: finalApi.outputs.id
61 | backendLocation: location
62 | }
63 | }
64 |
65 | output identityPrincipalId string = finalApi.outputs.identityPrincipalId
66 | output name string = finalApi.outputs.name
67 | output uri string = finalApi.outputs.uri
68 |
--------------------------------------------------------------------------------
/infra/app/vnet.bicep:
--------------------------------------------------------------------------------
1 | @description('Specifies the name of the virtual network.')
2 | param name string
3 |
4 | @description('Specifies the location.')
5 | param location string = resourceGroup().location
6 |
7 | @description('Specifies the name of the subnet for Function App virtual network integration.')
8 | param appSubnetName string = 'app'
9 |
10 | param tags object = {}
11 |
12 | resource virtualNetwork 'Microsoft.Network/virtualNetworks@2023-05-01' = {
13 | name: name
14 | location: location
15 | tags: tags
16 | properties: {
17 | addressSpace: {
18 | addressPrefixes: [
19 | '10.0.0.0/16'
20 | ]
21 | }
22 | encryption: {
23 | enabled: false
24 | enforcement: 'AllowUnencrypted'
25 | }
26 | subnets: [
27 | {
28 | name: appSubnetName
29 | id: resourceId('Microsoft.Network/virtualNetworks/subnets', name, 'app')
30 | properties: {
31 | addressPrefixes: [
32 | '10.0.1.0/24'
33 | ]
34 | delegations: [
35 | {
36 | name: 'delegation'
37 | id: '${resourceId('Microsoft.Network/virtualNetworks/subnets', name, 'app')}/delegations/delegation'
38 | properties: {
39 | //Microsoft.App/environments is the correct delegation for Flex Consumption VNet integration
40 | serviceName: 'Microsoft.App/environments'
41 | }
42 | type: 'Microsoft.Network/virtualNetworks/subnets/delegations'
43 | }
44 | ]
45 | serviceEndpoints: [
46 | {
47 | service: 'Microsoft.Storage'
48 | locations: [
49 | resourceGroup().location
50 | ]
51 | }
52 | ]
53 | privateEndpointNetworkPolicies: 'Disabled'
54 | privateLinkServiceNetworkPolicies: 'Enabled'
55 | }
56 | type: 'Microsoft.Network/virtualNetworks/subnets'
57 | }
58 | ]
59 | virtualNetworkPeerings: []
60 | enableDdosProtection: false
61 | }
62 | }
63 |
64 | output appSubnetName string = virtualNetwork.properties.subnets[0].name
65 | output appSubnetID string = virtualNetwork.properties.subnets[0].id
66 |
--------------------------------------------------------------------------------
/packages/api/src/functions/chats-get.ts:
--------------------------------------------------------------------------------
1 | import process from 'node:process';
2 | import { HttpRequest, HttpResponseInit, InvocationContext, app } from '@azure/functions';
3 | import { AzureCosmsosDBNoSQLChatMessageHistory } from '@langchain/azure-cosmosdb';
4 | import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system';
5 | import 'dotenv/config';
6 | import { badRequest, ok, notFound } from '../http-response.js';
7 | import { getCredentials, getUserId } from '../security.js';
8 |
9 | async function getChats(request: HttpRequest, context: InvocationContext): Promise {
10 | const azureCosmosDbEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT;
11 | const { sessionId } = request.params;
12 | const userId = getUserId(request);
13 |
14 | if (!userId) {
15 | return badRequest('Invalid or missing userId in the request');
16 | }
17 |
18 | try {
19 | let chatHistory;
20 |
21 | if (azureCosmosDbEndpoint) {
22 | const credentials = getCredentials();
23 | chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({
24 | sessionId,
25 | userId,
26 | credentials,
27 | });
28 | } else {
29 | // If no environment variables are set, it means we are running locally
30 | context.log('No Azure CosmosDB endpoint set, using local file');
31 |
32 | chatHistory = new FileSystemChatMessageHistory({
33 | sessionId,
34 | userId,
35 | });
36 | }
37 |
38 | if (sessionId) {
39 | const messages = await chatHistory.getMessages();
40 | const chatMessages = messages.map((message) => ({
41 | role: message.getType() === 'human' ? 'user' : 'assistant',
42 | content: message.content,
43 | }));
44 | return ok(chatMessages);
45 | }
46 |
47 | const sessions = await chatHistory.getAllSessions();
48 | const chatSessions = sessions.map((session) => ({
49 | id: session.id,
50 | title: session.context?.title,
51 | }));
52 | return ok(chatSessions);
53 | } catch (_error: unknown) {
54 | const error = _error as Error;
55 | context.error(`Error when processing chats-get request: ${error.message}`);
56 |
57 | return notFound('Session not found');
58 | }
59 | }
60 |
61 | app.http('chats-get', {
62 | route: 'chats/{sessionId?}',
63 | methods: ['GET'],
64 | authLevel: 'anonymous',
65 | handler: getChats,
66 | });
67 |
--------------------------------------------------------------------------------
/packages/api/src/functions/documents-get.ts:
--------------------------------------------------------------------------------
1 | import process from 'node:process';
2 | import fs from 'node:fs/promises';
3 | import { join } from 'node:path';
4 | import { finished } from 'node:stream/promises';
5 | import { HttpRequest, HttpResponseInit, InvocationContext, app } from '@azure/functions';
6 | import { BlobServiceClient } from '@azure/storage-blob';
7 | import 'dotenv/config';
8 | import { data, notFound } from '../http-response.js';
9 | import { getCredentials } from '../security.js';
10 |
11 | async function getDocument(request: HttpRequest, context: InvocationContext): Promise {
12 | const storageUrl = process.env.AZURE_STORAGE_URL;
13 | const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
14 | const { fileName } = request.params;
15 |
16 | try {
17 | let fileData: Uint8Array;
18 |
19 | if (storageUrl && containerName) {
20 | // Retrieve the file from Azure Blob Storage
21 | context.log(`Reading blob from: "${containerName}/${fileName}"`);
22 |
23 | const credentials = getCredentials();
24 | const blobServiceClient = new BlobServiceClient(storageUrl, credentials);
25 | const containerClient = blobServiceClient.getContainerClient(containerName);
26 | const response = await containerClient.getBlobClient(fileName).download();
27 |
28 | fileData = await streamToBuffer(response.readableStreamBody!);
29 | } else {
30 | // If no environment variables are set, it means we are running locally
31 | context.log(`Reading file from local file system: "data/${fileName}"`);
32 | const filePath = join(__dirname, '../../../../../data', fileName);
33 |
34 | fileData = await fs.readFile(filePath);
35 | }
36 |
37 | return data(fileData, { 'content-type': 'application/pdf' });
38 | } catch (_error: unknown) {
39 | const error = _error as Error;
40 | context.error(`Error when processing document-get request: ${error.message}`);
41 |
42 | return notFound('Document not found');
43 | }
44 | }
45 |
46 | async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise {
47 | const chunks: Uint8Array[] = [];
48 | readableStream.on('data', (data) => {
49 | chunks.push(Buffer.from(data));
50 | });
51 | await finished(readableStream);
52 | return Buffer.concat(chunks);
53 | }
54 |
55 | app.http('documents-get', {
56 | route: 'documents/{fileName}',
57 | methods: ['GET'],
58 | authLevel: 'anonymous',
59 | handler: getDocument,
60 | });
61 |
--------------------------------------------------------------------------------
/.github/workflows/azure-dev.yaml:
--------------------------------------------------------------------------------
1 | name: Deploy on Azure
2 | on:
3 | workflow_dispatch:
4 | push:
5 | # Run when commits are pushed to mainline branch (main)
6 | # Set this to the mainline branch you are using
7 | branches: [main]
8 |
9 | # GitHub Actions workflow to deploy to Azure using azd
10 | # To configure required secrets for connecting to Azure, simply run `azd pipeline config`
11 |
12 | # Set up permissions for deploying with secretless Azure federated credentials
13 | # https://learn.microsoft.com/en-us/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication
14 | permissions:
15 | id-token: write
16 | contents: read
17 |
18 | jobs:
19 | build:
20 | runs-on: ubuntu-latest
21 | env:
22 | AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
23 | AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
24 | AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
25 | AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }}
26 | steps:
27 | - name: Checkout
28 | uses: actions/checkout@v4
29 |
30 | - name: Install azd
31 | uses: Azure/setup-azd@v2
32 |
33 | - name: Install Nodejs
34 | uses: actions/setup-node@v4
35 | with:
36 | node-version: 20
37 |
38 | - name: Log in with Azure (Federated Credentials)
39 | if: ${{ env.AZURE_CLIENT_ID != '' }}
40 | run: |
41 | azd auth login `
42 | --client-id "$Env:AZURE_CLIENT_ID" `
43 | --federated-credential-provider "github" `
44 | --tenant-id "$Env:AZURE_TENANT_ID"
45 | shell: pwsh
46 |
47 | - name: Log in with Azure (Client Credentials)
48 | if: ${{ env.AZURE_CREDENTIALS != '' }}
49 | run: |
50 | $info = $Env:AZURE_CREDENTIALS | ConvertFrom-Json -AsHashtable;
51 | Write-Host "::add-mask::$($info.clientSecret)"
52 |
53 | azd auth login `
54 | --client-id "$($info.clientId)" `
55 | --client-secret "$($info.clientSecret)" `
56 | --tenant-id "$($info.tenantId)"
57 | shell: pwsh
58 | env:
59 | AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }}
60 |
61 | - name: Provision and deploy application
62 | run: azd up --no-prompt
63 | env:
64 | AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }}
65 | AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
66 | AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
67 |
--------------------------------------------------------------------------------
/packages/webapp/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | AI Chat with Enterprise Data
9 |
58 |
59 |
60 |
64 |
65 |
66 |
67 |
68 |
69 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
6 |
7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
8 |
9 | ## Reporting Security Issues
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 |
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 |
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 |
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
18 |
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 |
21 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | - Full paths of source file(s) related to the manifestation of the issue
23 | - The location of the affected source code (tag/branch/commit or direct URL)
24 | - Any special configuration required to reproduce the issue
25 | - Step-by-step instructions to reproduce the issue
26 | - Proof-of-concept or exploit code (if possible)
27 | - Impact of the issue, including how an attacker might exploit the issue
28 |
29 | This information will help us triage your report more quickly.
30 |
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 |
33 | ## Preferred Languages
34 |
35 | We prefer all communications to be in English.
36 |
37 | ## Policy
38 |
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 |
41 |
42 |
--------------------------------------------------------------------------------
/infra/core/storage/storage-account.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure storage account.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | @allowed([
7 | 'Cool'
8 | 'Hot'
9 | 'Premium' ])
10 | param accessTier string = 'Hot'
11 | param allowBlobPublicAccess bool = true
12 | param allowCrossTenantReplication bool = true
13 | param allowSharedKeyAccess bool = true
14 | param containers array = []
15 | param corsRules array = []
16 | param defaultToOAuthAuthentication bool = false
17 | param deleteRetentionPolicy object = {}
18 | @allowed([ 'AzureDnsZone', 'Standard' ])
19 | param dnsEndpointType string = 'Standard'
20 | param files array = []
21 | param kind string = 'StorageV2'
22 | param minimumTlsVersion string = 'TLS1_2'
23 | param queues array = []
24 | param shareDeleteRetentionPolicy object = {}
25 | param supportsHttpsTrafficOnly bool = true
26 | param tables array = []
27 | param networkAcls object = {
28 | bypass: 'AzureServices'
29 | defaultAction: 'Allow'
30 | }
31 | @allowed([ 'Enabled', 'Disabled' ])
32 | param publicNetworkAccess string = 'Enabled'
33 | param sku object = { name: 'Standard_LRS' }
34 |
35 | resource storage 'Microsoft.Storage/storageAccounts@2023-01-01' = {
36 | name: name
37 | location: location
38 | tags: tags
39 | kind: kind
40 | sku: sku
41 | properties: {
42 | accessTier: accessTier
43 | allowBlobPublicAccess: allowBlobPublicAccess
44 | allowCrossTenantReplication: allowCrossTenantReplication
45 | allowSharedKeyAccess: allowSharedKeyAccess
46 | defaultToOAuthAuthentication: defaultToOAuthAuthentication
47 | dnsEndpointType: dnsEndpointType
48 | minimumTlsVersion: minimumTlsVersion
49 | networkAcls: networkAcls
50 | publicNetworkAccess: publicNetworkAccess
51 | supportsHttpsTrafficOnly: supportsHttpsTrafficOnly
52 | }
53 |
54 | resource blobServices 'blobServices' = if (!empty(containers)) {
55 | name: 'default'
56 | properties: {
57 | cors: {
58 | corsRules: corsRules
59 | }
60 | deleteRetentionPolicy: deleteRetentionPolicy
61 | }
62 | resource container 'containers' = [for container in containers: {
63 | name: container.name
64 | properties: {
65 | publicAccess: contains(container, 'publicAccess') ? container.publicAccess : 'None'
66 | }
67 | }]
68 | }
69 |
70 | resource fileServices 'fileServices' = if (!empty(files)) {
71 | name: 'default'
72 | properties: {
73 | cors: {
74 | corsRules: corsRules
75 | }
76 | shareDeleteRetentionPolicy: shareDeleteRetentionPolicy
77 | }
78 | }
79 |
80 | resource queueServices 'queueServices' = if (!empty(queues)) {
81 | name: 'default'
82 | properties: {
83 |
84 | }
85 | resource queue 'queues' = [for queue in queues: {
86 | name: queue.name
87 | properties: {
88 | metadata: {}
89 | }
90 | }]
91 | }
92 |
93 | resource tableServices 'tableServices' = if (!empty(tables)) {
94 | name: 'default'
95 | properties: {}
96 | }
97 | }
98 |
99 | output id string = storage.id
100 | output name string = storage.name
101 | output primaryEndpoints object = storage.properties.primaryEndpoints
102 |
--------------------------------------------------------------------------------
/docs/old-tutorial/02-session.md:
--------------------------------------------------------------------------------
1 | # Session 02 - Init `CosmosDB LC Vector Store` in the project
2 |
3 | In this session, we will start creating the use of `CosmosDB LC vector store` to store the vectors generated by Azure OpenAI.
4 |
5 | ## Step 01 - Install packages
6 |
7 | Azure Cosmos DB for MongoDB vCore helps developers with a fully managed and scalable database, being compatible with MongoDB. In this part of the tutorial, we will be using Azure Cosmos DB for MongoDB vCore to store vector search resources.
8 |
9 | > know more: **[Use vector search on embeddings in Azure Cosmos DB for MongoDB vCore](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search)**
10 | > know more: **[Understand embeddings in Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/understand-embeddings)**
11 |
12 | Well, to start, let's install the following packages:
13 |
14 | ```bash
15 | npm install mongodb @langchain/community @langchain/core
16 | ```
17 |
18 | Now, let's do the following, add the following code to the `package.json` file:
19 |
20 | - `package.json`
21 |
22 | ```json
23 | {
24 | "name": "api",
25 | "version": "1.0.0",
26 | "description": "a sample api demonstrating how to use Azure Functions with Azure OpenAI and LangChain.js",
27 | "scripts": {
28 | "build": "tsc",
29 | "watch": "tsc -w",
30 | "clean": "rimraf dist",
31 | "prestart": "npm run clean && npm run build",
32 | "start:host": "npm run prestart && func start",
33 | "start": "npm-run-all --parallel start:host watch",
34 | "test": "echo \"No tests yet...\""
35 | },
36 | "dependencies": {
37 | "@azure/functions": "^4.0.0",
38 | "@langchain/azure-openai": "^0.0.2",
39 | "@langchain/community": "^0.0.36",
40 | "dotenv": "^16.4.5",
41 | "langchain": "^0.1.25",
42 | "mongodb": "^6.5.0",
43 | "@langchain/core": "^0.1.44"
44 | },
45 | "devDependencies": {
46 | "@types/node": "^18.x",
47 | "npm-run-all": "^4.1.5",
48 | "rimraf": "^5.0.0",
49 | "typescript": "^4.0.0"
50 | },
51 | "main": "dist/src/index.js"
52 | }
53 | ```
54 |
55 | ## Step 02 - Create a CosmosDB for MongoDB vCore cluster using Azure Portal
56 |
57 | Now we will need to create an Azure Cosmos DB for MongoDB vCore cluster. To do this, you can follow the step-by-step tutorial **[Quickstart: Create an Azure Cosmos DB for MongoDB vCore cluster by using the Azure portal](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/quickstart-portal)**
58 |
59 | After creating the cluster, we will need you to save the following information:
60 |
61 | - `cluster name`
62 | - `admin username`
63 | - `password`
64 | - `connection string`
65 |
66 | When you create the cluster, you can find this information in the `Connection String` tab of your cluster.
67 |
68 | After collecting this information, we'll include it in the project's `.env` file:
69 |
70 | - `.env`
71 |
72 | ```env
73 | AZURE_OPENAI_API_ENDPOINT=""
74 | AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME=""
75 | AZURE_OPENAI_API_KEY=""
76 |
77 | AZURE_COSMOSDB_CONNECTION_STRING=""
78 | ```
79 |
80 | Where `:` is, you should replace it with: `:`, which you created in the previous step.
81 |
82 | Perfect! We already have our CosmosDB configured and ready to be used. Now let's move on to the next step!
83 |
84 | Now that we have Azure CosmosDB configured and ready to be used in our project, let's get back to our code! Let's move on to the next step!
85 |
86 | ▶ **[Next Step: Implement Upload API](./03-session.md)**
87 |
--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
1 | # Serverless AI Chat with RAG using LangChain.js
2 |
3 | Serverless TypeScript Retrieval-Augmented Generation (RAG) chat sample: Lit + Vite frontend (Azure Static Web Apps), Azure Functions backend with LangChain.js, Cosmos DB vector store, Blob Storage for source documents, optional Azure OpenAI or local Ollama models. Provisioned by Bicep & Azure Developer CLI (azd) with CI/CD. Focus: reliability, citations, low cost, clear extension points.
4 |
5 | > **MISSION**: Provide a maintained Azure reference implementation of a serverless LangChain.js RAG chat that showcases best practices (citations, reliability, tooling) while staying lean and easy to extend.
6 |
7 | ## Overview
8 |
9 | - End-user asks questions in a web UI; backend performs RAG: embed/query vector store (Cosmos DB or in‑memory/faiss fallback), assemble context, invoke LLM (Azure OpenAI or local Ollama), stream answer + citations to client.
10 | - Documents (PDF/others) uploaded -> chunked & embedded -> stored for retrieval; blob storage keeps originals.
11 | - Architecture (high level):
12 | - Frontend: `packages/webapp` (Lit components, served locally by Vite, deployed via Static Web Apps)
13 | - Backend: `packages/api` (Azure Functions isolated worker w/ LangChain.js chains)
14 | - Data: Cosmos DB (vector and chat history), Blob Storage (docs)
15 | - Infra: `infra/` Bicep templates composed by `infra/main.bicep`, parameters in `infra/main.parameters.json`
16 | - Scripts: ingestion helper in `scripts/upload-documents.js`
17 |
18 | ## Key Technologies and Frameworks
19 |
20 | - TypeScript (monorepo via npm workspaces)
21 | - Azure Functions (Node.js runtime v4) + LangChain.js core/community providers
22 | - Lit + Vite for frontend UI
23 | - Azure Cosmos DB (vector store via @langchain/azure-cosmosdb) / faiss-node (local alt)
24 | - Azure Blob Storage (document source persistence)
25 | - Azure OpenAI / Ollama (LLM + embeddings)
26 | - Infrastructure as Code: Bicep + Azure Developer CLI (azd)
27 | - CI/CD: GitHub Actions
28 |
29 | ## Constraints and Requirements
30 |
31 | - Maintain simplicity; avoid premature abstractions or heavy frameworks
32 | - No proprietary dependencies beyond Azure services (prefer OSS + Azure)
33 |
34 | ## Development Workflow
35 |
36 | Root scripts (run from repository root):
37 |
38 | - `npm run start` – Launch webapp (`:8000`) and API Functions host (`:7071`) concurrently
39 | - `npm run build` – Build all workspaces
40 | - `npm run clean` – Clean build outputs
41 | - `npm run upload:docs` – Invoke ingestion script against local Functions host
42 |
43 | Backend (`packages/api`):
44 |
45 | - `npm run start` – Clean, build, start Functions host with TS watch
46 | - `npm run build` – TypeScript compile to `dist`
47 |
48 | Frontend (`packages/webapp`):
49 |
50 | - `npm run dev` – Vite dev server (port 8000)
51 | - `npm run build` – Production build
52 |
53 | ## Coding Guidelines
54 |
55 | - TypeScript strict-ish (reduced lint rules via XO config) balancing clarity for newcomers
56 | - Prettier enforced via lint-staged pre-commit hook
57 | - Favor explicit imports; keep functions small & composable
58 |
59 | ## Security Considerations
60 |
61 | - Secrets managed via Azure (Function App / Static Web App settings) – Avoid committing secrets
62 | - Test artifacts (traces, screenshots) must not include secrets → scrub logs & env variable exposure
63 | - Principle of least privilege in Bicep role assignments
64 |
65 | ## Extension Points
66 |
67 | - Swappable embeddings & LLM providers (Azure OpenAI ↔ Ollama) with minimal config changes
68 |
69 | ## Environment Variables (High-Level)
70 |
71 | - Azure OpenAI endpoints
72 | - Cosmos DB connection / database name
73 | - Blob storage account & container
74 |
--------------------------------------------------------------------------------
/infra/core/host/functions.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure Function in an existing Azure App Service plan.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | // Reference Properties
7 | param applicationInsightsName string = ''
8 | param appServicePlanId string
9 | param keyVaultName string = ''
10 | param managedIdentity bool = !empty(keyVaultName) || storageManagedIdentity
11 | param storageAccountName string
12 | param storageManagedIdentity bool = false
13 | param virtualNetworkSubnetId string = ''
14 |
15 | // Runtime Properties
16 | @allowed([
17 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom'
18 | ])
19 | param runtimeName string
20 | param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}'
21 | param runtimeVersion string
22 |
23 | // Function Settings
24 | @allowed([
25 | '~4', '~3', '~2', '~1'
26 | ])
27 | param extensionVersion string = '~4'
28 |
29 | // Microsoft.Web/sites Properties
30 | param kind string = 'functionapp,linux'
31 |
32 | // Microsoft.Web/sites/config
33 | param allowedOrigins array = []
34 | param alwaysOn bool = true
35 | param appCommandLine string = ''
36 | @secure()
37 | param appSettings object = {}
38 | param clientAffinityEnabled bool = false
39 | param enableOryxBuild bool = contains(kind, 'linux')
40 | param functionAppScaleLimit int = -1
41 | param linuxFxVersion string = runtimeNameAndVersion
42 | param minimumElasticInstanceCount int = -1
43 | param numberOfWorkers int = -1
44 | param scmDoBuildDuringDeployment bool = true
45 | param use32BitWorkerProcess bool = false
46 | param healthCheckPath string = ''
47 |
48 | module functions 'appservice.bicep' = {
49 | name: '${name}-functions'
50 | params: {
51 | name: name
52 | location: location
53 | tags: tags
54 | allowedOrigins: allowedOrigins
55 | alwaysOn: alwaysOn
56 | appCommandLine: appCommandLine
57 | applicationInsightsName: applicationInsightsName
58 | appServicePlanId: appServicePlanId
59 | appSettings: union(appSettings, {
60 | FUNCTIONS_EXTENSION_VERSION: extensionVersion
61 | FUNCTIONS_WORKER_RUNTIME: runtimeName
62 | }, storageManagedIdentity ? {
63 | AzureWebJobsStorage__accountName: storage.name
64 | } : {
65 | AzureWebJobsStorage: 'DefaultEndpointsProtocol=https;AccountName=${storage.name};AccountKey=${storage.listKeys().keys[0].value};EndpointSuffix=${environment().suffixes.storage}'
66 | })
67 | clientAffinityEnabled: clientAffinityEnabled
68 | enableOryxBuild: enableOryxBuild
69 | functionAppScaleLimit: functionAppScaleLimit
70 | healthCheckPath: healthCheckPath
71 | keyVaultName: keyVaultName
72 | kind: kind
73 | linuxFxVersion: linuxFxVersion
74 | managedIdentity: managedIdentity
75 | minimumElasticInstanceCount: minimumElasticInstanceCount
76 | numberOfWorkers: numberOfWorkers
77 | runtimeName: runtimeName
78 | runtimeVersion: runtimeVersion
79 | runtimeNameAndVersion: runtimeNameAndVersion
80 | scmDoBuildDuringDeployment: scmDoBuildDuringDeployment
81 | use32BitWorkerProcess: use32BitWorkerProcess
82 | virtualNetworkSubnetId: virtualNetworkSubnetId
83 | }
84 | }
85 |
86 | module storageOwnerRole '../../core/security/role.bicep' = if (storageManagedIdentity) {
87 | name: 'search-index-contrib-role-api'
88 | params: {
89 | principalId: functions.outputs.identityPrincipalId
90 | // Storage Blob Data Contributor
91 | roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe'
92 | principalType: 'ServicePrincipal'
93 | }
94 | }
95 |
96 | resource storage 'Microsoft.Storage/storageAccounts@2021-09-01' existing = {
97 | name: storageAccountName
98 | }
99 |
100 | output id string = functions.outputs.id
101 | output identityPrincipalId string = managedIdentity ? functions.outputs.identityPrincipalId : ''
102 | output name string = functions.outputs.name
103 | output uri string = functions.outputs.uri
104 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "serverless-chat-langchainjs",
3 | "version": "1.0.0",
4 | "description": "Build your own serverless AI chat with Retrieval-Augmented-Generation using LangChain.js, TypeScript and Azure",
5 | "private": true,
6 | "type": "module",
7 | "scripts": {
8 | "start": "concurrently \"npm:start:*\" --kill-others",
9 | "start:webapp": "npm run dev --workspace=webapp",
10 | "start:api": "npm run start --workspace=api",
11 | "build": "npm run build --workspaces --if-present",
12 | "build:webapp": "npm run build --workspace=webapp",
13 | "build:api": "npm run build --workspace=api",
14 | "clean": "npm run clean --workspaces --if-present",
15 | "upload:docs": "node scripts/upload-documents.js http://localhost:7071",
16 | "lint": "xo",
17 | "lint:fix": "xo --fix",
18 | "format": "prettier --list-different --write .",
19 | "prepare": "simple-git-hooks || echo 'simple-git-hooks install skipped'"
20 | },
21 | "repository": {
22 | "type": "git",
23 | "url": "https://github.com/Azure-Samples/serverless-chat-langchainjs.git"
24 | },
25 | "homepage": "https://github.com/Azure-Samples/serverless-chat-langchainjs",
26 | "bugs": {
27 | "url": "https://github.com/Azure-Samples/serverless-chat-langchainjs/issues"
28 | },
29 | "keywords": [
30 | "langchainjs",
31 | "chatbot",
32 | "retrieval-augmented-generation",
33 | "serverless",
34 | "typescript",
35 | "azure",
36 | "azure-functions",
37 | "azure-static-web-apps",
38 | "azure-ai-search",
39 | "mongodb"
40 | ],
41 | "author": "Microsoft",
42 | "license": "MIT",
43 | "workspaces": [
44 | "packages/*"
45 | ],
46 | "devDependencies": {
47 | "concurrently": "^9.0.0",
48 | "lint-staged": "^16.0.0",
49 | "prettier": "^3.0.3",
50 | "rimraf": "^6.0.1",
51 | "simple-git-hooks": "^2.9.0",
52 | "typescript": "^5.3.3",
53 | "xo": "^0.60.0"
54 | },
55 | "engines": {
56 | "node": ">=20",
57 | "npm": ">=10"
58 | },
59 | "simple-git-hooks": {
60 | "pre-commit": "npx lint-staged"
61 | },
62 | "lint-staged": {
63 | "*.{js,ts,md,yaml,yml,html,css}": "prettier --write",
64 | "*.{js,ts}": "xo --fix"
65 | },
66 | "xo": {
67 | "space": true,
68 | "prettier": true,
69 | "envs": [
70 | "node"
71 | ],
72 | "rules": {
73 | "@typescript-eslint/triple-slash-reference": "off",
74 | "@typescript-eslint/naming-convention": "off",
75 | "@typescript-eslint/consistent-type-definitions": "off",
76 | "@typescript-eslint/member-ordering": "off",
77 | "@typescript-eslint/no-unsafe-assignment": "off",
78 | "@typescript-eslint/prefer-nullish-coalescing": "off",
79 | "@typescript-eslint/no-unsafe-return": "off",
80 | "@typescript-eslint/no-unsafe-argument": "off",
81 | "@typescript-eslint/consistent-type-imports": "off",
82 | "@typescript-eslint/no-empty-function": "off",
83 | "@typescript-eslint/no-floating-promises": "off",
84 | "@typescript-eslint/ban-types": "off",
85 | "import/no-unassigned-import": "off",
86 | "import/extensions": "off",
87 | "n/prefer-global/process": "off",
88 | "no-new-func": "off",
89 | "unicorn/prefer-global-this": "off",
90 | "unicorn/prefer-module": "off",
91 | "unicorn/no-new-array": "off",
92 | "unicorn/prevent-abbreviations": [
93 | "error",
94 | {
95 | "allowList": {
96 | "combineDocsChain": true,
97 | "env": true,
98 | "Db": true
99 | }
100 | }
101 | ]
102 | }
103 | },
104 | "prettier": {
105 | "tabWidth": 2,
106 | "semi": true,
107 | "singleQuote": true,
108 | "printWidth": 120,
109 | "bracketSpacing": true,
110 | "overrides": [
111 | {
112 | "files": [
113 | "*.json"
114 | ],
115 | "options": {
116 | "parser": "json"
117 | }
118 | }
119 | ]
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to [project-title]
2 |
3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a
4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
5 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
6 |
7 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
8 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
9 | provided by the bot. You will only need to do this once across all repos using our CLA.
10 |
11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 |
15 | - [Code of Conduct](#coc)
16 | - [Issues and Bugs](#issue)
17 | - [Feature Requests](#feature)
18 | - [Submission Guidelines](#submit)
19 |
20 | ## Code of Conduct
21 |
22 | Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
23 |
24 | ## Found an Issue?
25 |
26 | If you find a bug in the source code or a mistake in the documentation, you can help us by
27 | [submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can
28 | [submit a Pull Request](#submit-pr) with a fix.
29 |
30 | ## Want a Feature?
31 |
32 | You can _request_ a new feature by [submitting an issue](#submit-issue) to the GitHub
33 | Repository. If you would like to _implement_ a new feature, please submit an issue with
34 | a proposal for your work first, to be sure that we can use it.
35 |
36 | - **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr).
37 |
38 | ## Submission Guidelines
39 |
40 | ### Submitting an Issue
41 |
42 | Before you submit an issue, search the archive, maybe your question was already answered.
43 |
44 | If your issue appears to be a bug, and hasn't been reported, open a new issue.
45 | Help us to maximize the effort we can spend fixing issues and adding new
46 | features, by not reporting duplicate issues. Providing the following information will increase the
47 | chances of your issue being dealt with quickly:
48 |
49 | - **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps
50 | - **Version** - what version is affected (e.g. 0.1.2)
51 | - **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you
52 | - **Browsers and Operating System** - is this a problem with all browsers?
53 | - **Reproduce the Error** - provide a live example or a unambiguous set of steps
54 | - **Related Issues** - has a similar issue been reported before?
55 | - **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
56 | causing the problem (line of code or commit)
57 |
58 | You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/[organization-name]/[repository-name]/issues/new].
59 |
60 | ### Submitting a Pull Request (PR)
61 |
62 | Before you submit your Pull Request (PR) consider the following guidelines:
63 |
64 | - Search the repository (https://github.com/[organization-name]/[repository-name]/pulls) for an open or closed PR
65 | that relates to your submission. You don't want to duplicate effort.
66 |
67 | - Make your changes in a new git fork:
68 |
69 | - Commit your changes using a descriptive commit message
70 | - Push your fork to GitHub:
71 | - In GitHub, create a pull request
72 | - If we suggest changes then:
73 |
74 | - Make the required updates.
75 | - Rebase your fork and force push to your GitHub repository (this will update your Pull Request):
76 |
77 | ```shell
78 | git rebase master -i
79 | git push -f
80 | ```
81 |
82 | That's it! Thank you for your contribution!
83 |
--------------------------------------------------------------------------------
/packages/api/src/functions/documents-post.ts:
--------------------------------------------------------------------------------
1 | import fs from 'node:fs/promises';
2 | import { type HttpRequest, type HttpResponseInit, type InvocationContext, app } from '@azure/functions';
3 | import { AzureOpenAIEmbeddings } from '@langchain/openai';
4 | import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
5 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
6 | import { AzureCosmosDBNoSQLVectorStore } from '@langchain/azure-cosmosdb';
7 | import { OllamaEmbeddings } from '@langchain/ollama';
8 | import { FaissStore } from '@langchain/community/vectorstores/faiss';
9 | import 'dotenv/config';
10 | import { BlobServiceClient } from '@azure/storage-blob';
11 | import { badRequest, serviceUnavailable, ok } from '../http-response.js';
12 | import { ollamaEmbeddingsModel, faissStoreFolder } from '../constants.js';
13 | import { getAzureOpenAiTokenProvider, getCredentials } from '../security.js';
14 |
15 | export async function postDocuments(request: HttpRequest, context: InvocationContext): Promise {
16 | const storageUrl = process.env.AZURE_STORAGE_URL;
17 | const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
18 | const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT;
19 |
20 | try {
21 | // Get the uploaded file from the request
22 | const parsedForm = await request.formData();
23 |
24 | if (!parsedForm.has('file')) {
25 | return badRequest('"file" field not found in form data.');
26 | }
27 |
28 | // Type mismatch between Node.js FormData and Azure Functions FormData
29 | const file = parsedForm.get('file') as any as File;
30 | const filename = file.name;
31 |
32 | // Extract text from the PDF
33 | const loader = new PDFLoader(file, {
34 | splitPages: false,
35 | });
36 | const rawDocument = await loader.load();
37 | rawDocument[0].metadata.source = filename;
38 |
39 | // Split the text into smaller chunks
40 | const splitter = new RecursiveCharacterTextSplitter({
41 | chunkSize: 1500,
42 | chunkOverlap: 100,
43 | });
44 | const documents = await splitter.splitDocuments(rawDocument);
45 |
46 | // Generate embeddings and save in database
47 | if (azureOpenAiEndpoint) {
48 | const credentials = getCredentials();
49 | const azureADTokenProvider = getAzureOpenAiTokenProvider();
50 |
51 | // Initialize embeddings model and vector database
52 | const embeddings = new AzureOpenAIEmbeddings({ azureADTokenProvider });
53 | await AzureCosmosDBNoSQLVectorStore.fromDocuments(documents, embeddings, { credentials });
54 | } else {
55 | // If no environment variables are set, it means we are running locally
56 | context.log('No Azure OpenAI endpoint set, using Ollama models and local DB');
57 | const embeddings = new OllamaEmbeddings({ model: ollamaEmbeddingsModel });
58 | const folderExists = await checkFolderExists(faissStoreFolder);
59 | if (folderExists) {
60 | const store = await FaissStore.load(faissStoreFolder, embeddings);
61 | await store.addDocuments(documents);
62 | await store.save(faissStoreFolder);
63 | } else {
64 | const store = await FaissStore.fromDocuments(documents, embeddings, {});
65 | await store.save(faissStoreFolder);
66 | }
67 | }
68 |
69 | if (storageUrl && containerName) {
70 | // Upload the PDF file to Azure Blob Storage
71 | context.log(`Uploading file to blob storage: "${containerName}/${filename}"`);
72 | const credentials = getCredentials();
73 | const blobServiceClient = new BlobServiceClient(storageUrl, credentials);
74 | const containerClient = blobServiceClient.getContainerClient(containerName);
75 | const blockBlobClient = containerClient.getBlockBlobClient(filename);
76 | const buffer = await file.arrayBuffer();
77 | await blockBlobClient.upload(buffer, file.size, {
78 | blobHTTPHeaders: { blobContentType: 'application/pdf' },
79 | });
80 | } else {
81 | context.log('No Azure Blob Storage connection string set, skipping upload.');
82 | }
83 |
84 | return ok({ message: 'PDF file uploaded successfully.' });
85 | } catch (_error: unknown) {
86 | const error = _error as Error;
87 | context.error(`Error when processing document-post request: ${error.message}`);
88 |
89 | return serviceUnavailable('Service temporarily unavailable. Please try again later.');
90 | }
91 | }
92 |
93 | async function checkFolderExists(folderPath: string): Promise {
94 | try {
95 | const stats = await fs.stat(folderPath);
96 | return stats.isDirectory();
97 | } catch {
98 | return false;
99 | }
100 | }
101 |
102 | app.http('documents-post', {
103 | route: 'documents',
104 | methods: ['POST'],
105 | authLevel: 'anonymous',
106 | handler: postDocuments,
107 | });
108 |
--------------------------------------------------------------------------------
/infra/core/host/appservice.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure App Service in an existing Azure App Service plan.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | // Reference Properties
7 | param applicationInsightsName string = ''
8 | param appServicePlanId string
9 | param keyVaultName string = ''
10 | param managedIdentity bool = !empty(keyVaultName)
11 |
12 | // Runtime Properties
13 | @allowed([
14 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom'
15 | ])
16 | param runtimeName string
17 | param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}'
18 | param runtimeVersion string
19 |
20 | // Microsoft.Web/sites Properties
21 | param kind string = 'app,linux'
22 |
23 | // Microsoft.Web/sites/config
24 | param allowedOrigins array = []
25 | param alwaysOn bool = true
26 | param appCommandLine string = ''
27 | @secure()
28 | param appSettings object = {}
29 | param clientAffinityEnabled bool = false
30 | param enableOryxBuild bool = contains(kind, 'linux')
31 | param functionAppScaleLimit int = -1
32 | param linuxFxVersion string = runtimeNameAndVersion
33 | param minimumElasticInstanceCount int = -1
34 | param numberOfWorkers int = -1
35 | param scmDoBuildDuringDeployment bool = false
36 | param use32BitWorkerProcess bool = false
37 | param ftpsState string = 'FtpsOnly'
38 | param healthCheckPath string = ''
39 | param virtualNetworkSubnetId string = ''
40 |
41 | resource appService 'Microsoft.Web/sites@2022-03-01' = {
42 | name: name
43 | location: location
44 | tags: tags
45 | kind: kind
46 | properties: {
47 | serverFarmId: appServicePlanId
48 | siteConfig: {
49 | linuxFxVersion: linuxFxVersion
50 | alwaysOn: alwaysOn
51 | ftpsState: ftpsState
52 | minTlsVersion: '1.2'
53 | appCommandLine: appCommandLine
54 | numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null
55 | minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null
56 | use32BitWorkerProcess: use32BitWorkerProcess
57 | functionAppScaleLimit: functionAppScaleLimit != -1 ? functionAppScaleLimit : null
58 | healthCheckPath: healthCheckPath
59 | cors: {
60 | allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins)
61 | }
62 | }
63 | clientAffinityEnabled: clientAffinityEnabled
64 | httpsOnly: true
65 | virtualNetworkSubnetId: !empty(virtualNetworkSubnetId) ? virtualNetworkSubnetId : null
66 | }
67 |
68 | identity: { type: managedIdentity ? 'SystemAssigned' : 'None' }
69 |
70 | resource basicPublishingCredentialsPoliciesFtp 'basicPublishingCredentialsPolicies' = {
71 | name: 'ftp'
72 | properties: {
73 | allow: false
74 | }
75 | }
76 |
77 | resource basicPublishingCredentialsPoliciesScm 'basicPublishingCredentialsPolicies' = {
78 | name: 'scm'
79 | properties: {
80 | allow: false
81 | }
82 | }
83 | }
84 |
85 | // Updates to the single Microsoft.sites/web/config resources that need to be performed sequentially
86 | // sites/web/config 'appsettings'
87 | module configAppSettings 'appservice-appsettings.bicep' = {
88 | name: '${name}-appSettings'
89 | params: {
90 | name: appService.name
91 | appSettings: union(appSettings,
92 | {
93 | SCM_DO_BUILD_DURING_DEPLOYMENT: string(scmDoBuildDuringDeployment)
94 | ENABLE_ORYX_BUILD: string(enableOryxBuild)
95 | },
96 | runtimeName == 'python' && appCommandLine == '' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true'} : {},
97 | !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {},
98 | !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {})
99 | }
100 | }
101 |
102 | // sites/web/config 'logs'
103 | resource configLogs 'Microsoft.Web/sites/config@2022-03-01' = {
104 | name: 'logs'
105 | parent: appService
106 | properties: {
107 | applicationLogs: { fileSystem: { level: 'Verbose' } }
108 | detailedErrorMessages: { enabled: true }
109 | failedRequestsTracing: { enabled: true }
110 | httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } }
111 | }
112 | dependsOn: [configAppSettings]
113 | }
114 |
115 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) {
116 | name: keyVaultName
117 | }
118 |
119 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) {
120 | name: applicationInsightsName
121 | }
122 |
123 | output id string = appService.id
124 | output identityPrincipalId string = managedIdentity ? appService.identity.principalId : ''
125 | output name string = appService.name
126 | output uri string = 'https://${appService.properties.defaultHostName}'
127 |
--------------------------------------------------------------------------------
/infra/core/host/functions-flex.bicep:
--------------------------------------------------------------------------------
1 | metadata description = 'Creates an Azure Function (flex consumption) in an existing Azure App Service plan.'
2 | param name string
3 | param location string = resourceGroup().location
4 | param tags object = {}
5 |
6 | // Reference Properties
7 | param applicationInsightsName string = ''
8 | param appServicePlanId string
9 | param keyVaultName string = ''
10 | param virtualNetworkSubnetId string = ''
11 |
12 | // Runtime Properties
13 | @allowed([
14 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom'
15 | ])
16 | param runtimeName string
17 | @allowed(['3.10', '3.11', '7.4', '8.0', '10', '11', '17', '20'])
18 | param runtimeVersion string
19 |
20 | // Microsoft.Web/sites Properties
21 | param kind string = 'functionapp,linux'
22 |
23 | // Microsoft.Web/sites/config
24 | param allowedOrigins array = []
25 | param alwaysOn bool = true
26 | param appCommandLine string = ''
27 | @secure()
28 | param appSettings object = {}
29 | param clientAffinityEnabled bool = false
30 | param maximumInstanceCount int = 800
31 | param instanceMemoryMB int = 2048
32 | param minimumElasticInstanceCount int = -1
33 | param numberOfWorkers int = -1
34 | param healthCheckPath string = ''
35 | param storageAccountName string
36 |
37 | resource functions 'Microsoft.Web/sites@2023-12-01' = {
38 | name: name
39 | location: location
40 | tags: tags
41 | kind: kind
42 | properties: {
43 | serverFarmId: appServicePlanId
44 | siteConfig: {
45 | ftpsState: 'FtpsOnly'
46 | alwaysOn: alwaysOn
47 | minTlsVersion: '1.2'
48 | appCommandLine: appCommandLine
49 | numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null
50 | minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null
51 | healthCheckPath: healthCheckPath
52 | cors: {
53 | allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins)
54 | }
55 | }
56 | functionAppConfig: {
57 | deployment: {
58 | storage: {
59 | type: 'blobContainer'
60 | value: '${storage.properties.primaryEndpoints.blob}${name}'
61 | authentication: {
62 | type: 'SystemAssignedIdentity'
63 | }
64 | }
65 | }
66 | scaleAndConcurrency: {
67 | maximumInstanceCount: maximumInstanceCount
68 | instanceMemoryMB: instanceMemoryMB
69 | }
70 | runtime: {
71 | name: runtimeName
72 | version: runtimeVersion
73 | }
74 | }
75 | clientAffinityEnabled: clientAffinityEnabled
76 | httpsOnly: true
77 | virtualNetworkSubnetId: !empty(virtualNetworkSubnetId) ? virtualNetworkSubnetId : null
78 | }
79 |
80 | identity: { type: 'SystemAssigned' }
81 | }
82 |
83 | // Updates to the single Microsoft.sites/web/config resources that need to be performed sequentially
84 | // sites/web/config 'appsettings'
85 | module configAppSettings 'appservice-appsettings.bicep' = {
86 | name: '${name}-appSettings'
87 | params: {
88 | name: functions.name
89 | appSettings: union(appSettings,
90 | {
91 | AzureWebJobsStorage__accountName: storage.name
92 | },
93 | runtimeName == 'python' && appCommandLine == '' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true'} : {},
94 | !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {},
95 | !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {})
96 | }
97 | }
98 |
99 | // sites/web/config 'logs'
100 | resource configLogs 'Microsoft.Web/sites/config@2022-03-01' = {
101 | name: 'logs'
102 | parent: functions
103 | properties: {
104 | applicationLogs: { fileSystem: { level: 'Verbose' } }
105 | detailedErrorMessages: { enabled: true }
106 | failedRequestsTracing: { enabled: true }
107 | httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } }
108 | }
109 | dependsOn: [configAppSettings]
110 | }
111 |
112 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) {
113 | name: keyVaultName
114 | }
115 |
116 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) {
117 | name: applicationInsightsName
118 | }
119 |
120 | resource storage 'Microsoft.Storage/storageAccounts@2021-09-01' existing = {
121 | name: storageAccountName
122 | }
123 |
124 | var storageContributorRole = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe')
125 |
126 | resource storageContainer 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
127 | scope: storage // Use when specifying a scope that is different than the deployment scope
128 | name: guid(subscription().id, resourceGroup().id, functions.id, storageContributorRole)
129 | properties: {
130 | roleDefinitionId: storageContributorRole
131 | principalType: 'ServicePrincipal'
132 | principalId: functions.identity.principalId
133 | }
134 | }
135 |
136 | output id string = functions.id
137 | output identityPrincipalId string = functions.identity.principalId
138 | output name string = functions.name
139 | output uri string = 'https://${functions.properties.defaultHostName}'
140 |
--------------------------------------------------------------------------------
/infra/abbreviations.json:
--------------------------------------------------------------------------------
1 | {
2 | "analysisServicesServers": "as",
3 | "apiManagementService": "apim-",
4 | "appConfigurationStores": "appcs-",
5 | "appManagedEnvironments": "cae-",
6 | "appContainerApps": "ca-",
7 | "authorizationPolicyDefinitions": "policy-",
8 | "automationAutomationAccounts": "aa-",
9 | "blueprintBlueprints": "bp-",
10 | "blueprintBlueprintsArtifacts": "bpa-",
11 | "cacheRedis": "redis-",
12 | "cdnProfiles": "cdnp-",
13 | "cdnProfilesEndpoints": "cdne-",
14 | "cognitiveServicesAccounts": "cog-",
15 | "cognitiveServicesFormRecognizer": "cog-fr-",
16 | "cognitiveServicesTextAnalytics": "cog-ta-",
17 | "cognitiveServicesSpeech": "cog-sp-",
18 | "computeAvailabilitySets": "avail-",
19 | "computeCloudServices": "cld-",
20 | "computeDiskEncryptionSets": "des",
21 | "computeDisks": "disk",
22 | "computeDisksOs": "osdisk",
23 | "computeGalleries": "gal",
24 | "computeSnapshots": "snap-",
25 | "computeVirtualMachines": "vm",
26 | "computeVirtualMachineScaleSets": "vmss-",
27 | "containerInstanceContainerGroups": "ci",
28 | "containerRegistryRegistries": "cr",
29 | "containerServiceManagedClusters": "aks-",
30 | "databricksWorkspaces": "dbw-",
31 | "dataFactoryFactories": "adf-",
32 | "dataLakeAnalyticsAccounts": "dla",
33 | "dataLakeStoreAccounts": "dls",
34 | "dataMigrationServices": "dms-",
35 | "dBforMySQLServers": "mysql-",
36 | "dBforPostgreSQLServers": "psql-",
37 | "devicesIotHubs": "iot-",
38 | "devicesProvisioningServices": "provs-",
39 | "devicesProvisioningServicesCertificates": "pcert-",
40 | "documentDBDatabaseAccounts": "cosmos-",
41 | "eventGridDomains": "evgd-",
42 | "eventGridDomainsTopics": "evgt-",
43 | "eventGridEventSubscriptions": "evgs-",
44 | "eventHubNamespaces": "evhns-",
45 | "eventHubNamespacesEventHubs": "evh-",
46 | "hdInsightClustersHadoop": "hadoop-",
47 | "hdInsightClustersHbase": "hbase-",
48 | "hdInsightClustersKafka": "kafka-",
49 | "hdInsightClustersMl": "mls-",
50 | "hdInsightClustersSpark": "spark-",
51 | "hdInsightClustersStorm": "storm-",
52 | "hybridComputeMachines": "arcs-",
53 | "insightsActionGroups": "ag-",
54 | "insightsComponents": "appi-",
55 | "keyVaultVaults": "kv-",
56 | "kubernetesConnectedClusters": "arck",
57 | "kustoClusters": "dec",
58 | "kustoClustersDatabases": "dedb",
59 | "loadTesting": "lt-",
60 | "logicIntegrationAccounts": "ia-",
61 | "logicWorkflows": "logic-",
62 | "machineLearningServicesWorkspaces": "mlw-",
63 | "managedIdentityUserAssignedIdentities": "id-",
64 | "managementManagementGroups": "mg-",
65 | "migrateAssessmentProjects": "migr-",
66 | "networkApplicationGateways": "agw-",
67 | "networkApplicationSecurityGroups": "asg-",
68 | "networkAzureFirewalls": "afw-",
69 | "networkBastionHosts": "bas-",
70 | "networkConnections": "con-",
71 | "networkDnsZones": "dnsz-",
72 | "networkExpressRouteCircuits": "erc-",
73 | "networkFirewallPolicies": "afwp-",
74 | "networkFirewallPoliciesWebApplication": "waf",
75 | "networkFirewallPoliciesRuleGroups": "wafrg",
76 | "networkFrontDoors": "fd-",
77 | "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-",
78 | "networkLoadBalancersExternal": "lbe-",
79 | "networkLoadBalancersInternal": "lbi-",
80 | "networkLoadBalancersInboundNatRules": "rule-",
81 | "networkLocalNetworkGateways": "lgw-",
82 | "networkNatGateways": "ng-",
83 | "networkNetworkInterfaces": "nic-",
84 | "networkNetworkSecurityGroups": "nsg-",
85 | "networkNetworkSecurityGroupsSecurityRules": "nsgsr-",
86 | "networkNetworkWatchers": "nw-",
87 | "networkPrivateDnsZones": "pdnsz-",
88 | "networkPrivateLinkServices": "pl-",
89 | "networkPublicIPAddresses": "pip-",
90 | "networkPublicIPPrefixes": "ippre-",
91 | "networkRouteFilters": "rf-",
92 | "networkRouteTables": "rt-",
93 | "networkRouteTablesRoutes": "udr-",
94 | "networkTrafficManagerProfiles": "traf-",
95 | "networkVirtualNetworkGateways": "vgw-",
96 | "networkVirtualNetworks": "vnet-",
97 | "networkVirtualNetworksSubnets": "snet-",
98 | "networkVirtualNetworksVirtualNetworkPeerings": "peer-",
99 | "networkVirtualWans": "vwan-",
100 | "networkVpnGateways": "vpng-",
101 | "networkVpnGatewaysVpnConnections": "vcn-",
102 | "networkVpnGatewaysVpnSites": "vst-",
103 | "notificationHubsNamespaces": "ntfns-",
104 | "notificationHubsNamespacesNotificationHubs": "ntf-",
105 | "operationalInsightsWorkspaces": "log-",
106 | "portalDashboards": "dash-",
107 | "powerBIDedicatedCapacities": "pbi-",
108 | "purviewAccounts": "pview-",
109 | "recoveryServicesVaults": "rsv-",
110 | "resourcesResourceGroups": "rg-",
111 | "searchSearchServices": "srch-",
112 | "serviceBusNamespaces": "sb-",
113 | "serviceBusNamespacesQueues": "sbq-",
114 | "serviceBusNamespacesTopics": "sbt-",
115 | "serviceEndPointPolicies": "se-",
116 | "serviceFabricClusters": "sf-",
117 | "signalRServiceSignalR": "sigr",
118 | "sqlManagedInstances": "sqlmi-",
119 | "sqlServers": "sql-",
120 | "sqlServersDataWarehouse": "sqldw-",
121 | "sqlServersDatabases": "sqldb-",
122 | "sqlServersDatabasesStretch": "sqlstrdb-",
123 | "storageStorageAccounts": "st",
124 | "storageStorageAccountsVm": "stvm",
125 | "storSimpleManagers": "ssimp",
126 | "streamAnalyticsCluster": "asa-",
127 | "synapseWorkspaces": "syn",
128 | "synapseWorkspacesAnalyticsWorkspaces": "synw",
129 | "synapseWorkspacesSqlPoolsDedicated": "syndp",
130 | "synapseWorkspacesSqlPoolsSpark": "synsp",
131 | "timeSeriesInsightsEnvironments": "tsi-",
132 | "webServerFarms": "plan-",
133 | "webSitesAppService": "app-",
134 | "webSitesAppServiceEnvironment": "ase-",
135 | "webSitesFunctions": "func-",
136 | "webStaticSites": "stapp-"
137 | }
138 |
--------------------------------------------------------------------------------
/docs/tutorial/03-understanding-rag.md:
--------------------------------------------------------------------------------
1 | # Understanding the RAG (Retrieval Augmented Generation) architecture
2 |
3 | In this section, we'll understand what the RAG (Retrieval Augmented Generation) architecture is, how it works, and why it's important for integration with AI models.
4 |
5 | ## What is RAG (Retrieval Augmented Generation)?
6 |
7 | RAG is an architecture that integrates external information retrieval into the response generation process of Large Language Models (LLMs)
8 |
9 | It allows searching for a specific database, in addition to the pre-trained knowledge base, to significantly improve the accuracy and relevance of answers.
10 |
11 | In the business context, RAG architecture enables generative artificial intelligence (AI) to focus exclusively on company-relevant content. This allows AI to work with specific information, such as documents, vectorized images, and other types of business data. To achieve this, AI models must be capable of understanding and processing these specific types of content.
12 |
13 | In simple terms, RAG architecture enables organizations to use AI to analyze and generate information from their specific data, including texts and images that are related to their business, in a controlled and targeted manner.
14 |
15 | ## RAG Architecture Components
16 |
17 | 
18 |
19 | Implement the standard RAG architecture following this flow. It should include:
20 |
21 | 1. **Ingestion:**
22 |
23 | - **How it works:** Indexing is the process of organizing data in a vector database to make it easily searchable. This critical step allows the RAG to access relevant information quickly when responding to a query.
24 | - **Mechanism:** Starts with the collection of documents, which are divided into smaller chunks by a **splitter**. Complex algorithms transform each piece of text into an embedding vector, which is then stored in the database for efficient retrieval of similar information.
25 |
26 | 2. **Retrieval:**
27 |
28 | - **How it works:** This process uses vector similarity search to find the most relevant documents or passages to answer a query.
29 | - **Mechanisms:**
30 |
31 | - **Sparse vector representations:** Use sparse vector representations to be texts through vectors that highlight specific characteristics of the data. These representations are called **sparse** because they tend to have many zero values, focusing only on specific aspects such as the presence of certain key words or phrases. This type of representation is useful for research based on specific terms but may not capture the full semantics of the text well.
32 |
33 | - **Dense vector embeddings:** Use language models to encode queries and passages in dense vectors, which are stored in vector databases and allow retrieval based on semantic similarity.
34 |
35 | - **Hybrid search:** Combines the techniques of keyword search and semantic search to take advantage of the strengths of both types of representations. Hybrid search improves the quality of results by maximizing relevance and precision of retrieved information.
36 |
37 | 3. **Generation:**
38 |
39 | - **How it works:** With the most relevant passages retrieved, the generator's task is to produce a final response, synthesizing and expressing this information in natural language.
40 | - **Mechanisms:** The generator, which is typically a model like GPT, BART, or T5, uses both the query and the relevant documents found by retriever to create its response. It is important to note that the generator relies on the retriever to find the relevant documents.
41 |
42 | ## Why is RAG architecture important for integration with AI models?
43 |
44 | RAG architecture is useful for AI models in business contexts. It allows for flexible and efficient integration with various databases, improving the relevance and accuracy of generated responses while customizing the application to meet specific business needs.
45 |
46 | Here are some advantages of integrating RAG architecture with AI models:
47 |
48 | ### Adapting the RAG Architecture to different Enterprise Databases
49 |
50 | The RAG architecture can be configured to work with a variety of databases. It can adapt to access and incorporate information from various sources as needed. This is possible because the retrieval component of the architecture can interact with different data formats and structures, from traditional relational databases to document repositories or content management systems.
51 |
52 | Examples of data types that can be integrated:
53 |
54 | 1. **Textual documents:** Internal documents, analysis reports, procedure manuals, and technical documentation. RAG can retrieve relevant information from these documents to answer specific questions that require detailed knowledge held there.
55 |
56 | 2. **Relational Databases:** Structured data such as customer records, financial transactions, and inventory records. Although traditionally not the focus of LLMs, by integrating RAG, AI models can extract and use information from tables and databases to enrich its answers or perform specific analyses.
57 |
58 | 3. **Social media data and customer feedback:** Comments and reviews that can be used to better understand market trends, consumer sentiment, and to answer questions related to customer service.
59 |
60 | 4. **Image and Video Databases:** Through descriptions or metadata associated with media, RAG can retrieve pertinent visual information to answer queries involving image identification or visual content analysis.
61 |
62 | ### Applications of RAG Across Industries
63 |
64 | RAG has significant implications in many fields:
65 |
66 | - **Legal Research:** Legal professionals can access and review relevant case laws and precedents quickly.
67 |
68 | - **Medical Diagnosis:** Healthcare professionals can retrieve up-to-date patient records and research to support diagnosis and treatment plans.
69 |
70 | - **Customer Support:** Service agents can provide responses based on the latest product information and manuals.
71 |
72 | - **Market Analysis:** Analysts can use the latest market data and trends to support business decisions.
73 |
74 | - **Educational Content:** Educators can update their materials with the latest research and studies to ensure relevance and accuracy.
75 |
76 | > **Note:** To learn more about the RAG architecture, please refer to the official documentation of the Azure Cosmos DB Documentation service, which can be accessed [here](https://learn.microsoft.com/azure/cosmos-db/gen-ai/rag).
77 |
78 | ## Next Steps
79 |
80 | RAG architecture is a powerful tool for improving the accuracy and relevance of AI models's responses. It makes AI models a more effective solution for business scenarios and other contexts where access to specific information is essential.
81 |
82 | Now that we have a clear understanding of the RAG architecture, we can begin developing the functions for integration with AI models on the Front-end side. In the next section, we will start developing the `chat-post` function or the `chat` API for integration with AI models.
83 |
84 | **[⬅️ Back: Setting Up the Serverless Environment using Azure Functions](02-setting-up-azure-functions.md)** | **[Next: Developing the `chat` API ➡️ ](./04-preparing-understanding-language-models.md)**
85 |
--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
1 | ## Frequently Asked Questions
2 |
3 |
4 | What is Retrieval-Augmented Generation?
5 |
6 | Retrieval-Augmented Generation (RAG) is a method used in artificial intelligence, particularly in natural language processing, to generate text responses that are both contextually relevant and rich in content using AI models.
7 |
8 | At its core, RAG involves two main components:
9 |
10 | - **Retriever**: Think "_like a search engine_", finding relevant information from a knowledgebase, usually a vector database. In this sample, we're using Azure Cosmos DB for NoSQL as our vector database.
11 |
12 | - **Generator**: Acts like a writer, taking the prompt and information retrieved to create a response. We're using here a Large Language Model (LLM) for this task.
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | How can we upload additional documents without redeploying everything?
22 |
23 | To upload more documents, first put your PDF document in the `data/` folder, then use one of these commands depending on your environment.
24 |
25 | ### For local development
26 |
27 | Make sure your API is started by running `npm run start:api` from the root of the project. Then you can use one of the following commands to upload a new PDF document:
28 |
29 | ```bash
30 | # If you're using a POSIX shell
31 | curl -F "file=@data/" http://localhost:7071/api/documents
32 |
33 | # If you're using PowerShell
34 | Invoke-RestMethod -Uri "http://localhost:7071/api/documents" -Method Post -InFile "./data/"
35 | ```
36 |
37 | You can also use the following command to reupload all PDFs file in the `/data` folder at once:
38 |
39 | ```bash
40 | npm run upload:docs
41 | ```
42 |
43 | ### For the deployed version
44 |
45 | First you need to find the URL of the deployed function. You can either look at the `packages/api/.env` file and search for the `API_URI` variable, or run this command to get the URL:
46 |
47 | ```bash
48 | azd env get-values | grep API_URI
49 | ```
50 |
51 | Then you can use the one of the following commands to upload a new PDF document:
52 |
53 | ```bash
54 | # If you're using a POSIX shell
55 | curl -F "file=@data/" /api/documents
56 |
57 | # If you're using PowerShell
58 | Invoke-RestMethod -Uri "/api/documents" -Method Post -InFile "./data/"
59 | ```
60 |
61 | You can also use the following command to reupload all PDFs file in the `/data` folder at once:
62 |
63 | ```bash
64 | node scripts/upload-documents.js
65 | ```
66 |
67 |
68 |
69 |
70 | Why do we need to break up the documents into chunks?
71 |
72 | Chunking allows us to limit the amount of information we send to the LLM due to token limits. By breaking up the content, it allows us to easily find potential chunks of text that we can inject and improve the relevance of the results. The method of chunking we use leverages a sliding window of text such that sentences that end one chunk will start the next. This allows us to reduce the chance of losing the context of the text.
73 |
74 |
75 |
76 |
77 | How do you change the models used in this sample?
78 |
79 | You can use the environment variables to change the chat and embeddings models used in this sample when deployed.
80 | Run these commands:
81 |
82 | ```bash
83 | azd env set AZURE_OPENAI_API_MODEL gpt-4
84 | azd env set AZURE_OPENAI_API_MODEL_VERSION 0125-preview
85 | azd env set AZURE_OPENAI_API_EMBEDDINGS_MODEL text-embedding-3-large
86 | azd env set AZURE_OPENAI_API_EMBEDDINGS_MODEL_VERSION 1
87 | ```
88 |
89 | You may also need to adjust the capacity in `infra/main.bicep` file, depending on how much TPM your account is allowed.
90 |
91 | ### Local models
92 |
93 | To change the local models used by Ollama, you can edit the file `packages/api/src/constants.ts`:
94 |
95 | ```typescript
96 | export const ollamaEmbeddingsModel = 'nomic-embed-text:latest';
97 | export const ollamaChatModel = 'llama3.1:latest';
98 | ```
99 |
100 | You can see the complete list of available models at https://ollama.ai/models.
101 |
102 | After changing the models, you also need to fetch the new models by running the command:
103 |
104 | ```bash
105 | ollama pull
106 | ```
107 |
108 |
109 |
110 |
111 | What does the azd up command do?
112 |
113 | The `azd up` command comes from the [Azure Developer CLI](https://learn.microsoft.com/azure/developer/azure-developer-cli/overview), and takes care of both provisioning the Azure resources and deploying code to the selected Azure hosts.
114 |
115 | The `azd up` command uses the `azure.yaml` file combined with the infrastructure-as-code `.bicep` files in the `infra/` folder. The `azure.yaml` file for this project declares several "hooks" for the prepackage step and postprovision steps. The `up` command first runs the `prepackage` hook which installs Node dependencies and builds the TypeScript files. It then packages all the code (both frontend and backend services) into a zip file which it will deploy later.
116 |
117 | Next, it provisions the resources based on `main.bicep` and `main.parameters.json`. At that point, since there is no default value for the OpenAI resource location, it asks you to pick a location from a short list of available regions. Then it will send requests to Azure to provision all the required resources. With everything provisioned, it runs the `postprovision` hook to process the local data and add it to an Azure Cosmos DB index.
118 |
119 | Finally, it looks at `azure.yaml` to determine the Azure host (Functions and Static Web Apps, in this case) and uploads the zip to Azure. The `azd up` command is now complete, but it may take some time for the app to be fully available and working after the initial deploy.
120 |
121 | Related commands are `azd provision` for just provisioning (if infra files change) and `azd deploy` for just deploying updated app code.
122 |
123 |
124 |
125 |
126 | Why using Azure Cosmos DB for vector search? What about Azure AI Search?
127 |
128 | There are multiple Azure services that implement vector search capabilities, including Azure Cosmos DB. In this sample, we use Azure Cosmos DB for vector search because it's also a regular NoSQL database that can store any of your regular data workloads in addition to the vector search data. This makes it a versatile choice for a wide range of applications, all while keeping costs low by using a serverless tier.
129 |
130 | Azure AI Search is another option for vector search, but it's more focused on search capabilities: it provides more advanced vector search and hybrid search options, though it doesn't provide the same flexibility as Azure Cosmos DB. We also have a version of this sample that uses Azure AI Search, which you can find [here](https://github.com/Azure-Samples/serverless-chat-langchainjs/tree/ai-search).
131 |
132 | For more information about Azure vector search options, you can check out [this architecture guide](https://learn.microsoft.com/azure/architecture/guide/technology-choices/vector-search).
133 |
134 |
135 |
136 |
150 |
--------------------------------------------------------------------------------
/packages/api/src/functions/chats-post.ts:
--------------------------------------------------------------------------------
1 | import { Readable } from 'node:stream';
2 | import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions';
3 | import { AIChatCompletionRequest, AIChatCompletionDelta } from '@microsoft/ai-chat-protocol';
4 | import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/openai';
5 | import { Embeddings } from '@langchain/core/embeddings';
6 | import { AzureCosmsosDBNoSQLChatMessageHistory, AzureCosmosDBNoSQLVectorStore } from '@langchain/azure-cosmosdb';
7 | import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system';
8 | import { BaseChatModel } from '@langchain/core/language_models/chat_models';
9 | import { RunnableWithMessageHistory } from '@langchain/core/runnables';
10 | import { VectorStore } from '@langchain/core/vectorstores';
11 | import { ChatOllama, OllamaEmbeddings } from '@langchain/ollama';
12 | import { FaissStore } from '@langchain/community/vectorstores/faiss';
13 | import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts';
14 | import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
15 | import { v4 as uuidv4 } from 'uuid';
16 | import 'dotenv/config';
17 | import { badRequest, data, serviceUnavailable } from '../http-response.js';
18 | import { ollamaChatModel, ollamaEmbeddingsModel, faissStoreFolder } from '../constants.js';
19 | import { getAzureOpenAiTokenProvider, getCredentials, getUserId } from '../security.js';
20 |
21 | const ragSystemPrompt = `Assistant helps the Consto Real Estate company customers with questions and support requests. Be brief in your answers. Answer only plain text, DO NOT use Markdown.
22 | Answer ONLY with information from the sources below. If there isn't enough information in the sources, say you don't know. Do not generate answers that don't use the sources. If asking a clarifying question to the user would help, ask the question.
23 | If the user question is not in English, answer in the language used in the question.
24 |
25 | Each source has the format "[filename]: information". ALWAYS reference the source filename for every part used in the answer. Use the format "[filename]" to reference a source, for example: [info1.txt]. List each source separately, for example: [info1.txt][info2.pdf].
26 |
27 | Generate 3 very brief follow-up questions that the user would likely ask next.
28 | Enclose the follow-up questions in double angle brackets. Example:
29 | <>
30 | <>
31 | <>
32 |
33 | Do no repeat questions that have already been asked.
34 | Make sure the last question ends with ">>".
35 |
36 | SOURCES:
37 | {context}`;
38 |
39 | const titleSystemPrompt = `Create a title for this chat session, based on the user question. The title should be less than 32 characters. Do NOT use double-quotes.`;
40 |
41 | export async function postChats(request: HttpRequest, context: InvocationContext): Promise {
42 | const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT;
43 |
44 | try {
45 | const requestBody = (await request.json()) as AIChatCompletionRequest;
46 | const { messages, context: chatContext } = requestBody;
47 | const userId = getUserId(request, requestBody);
48 |
49 | if (!messages || messages.length === 0 || !messages.at(-1)?.content) {
50 | return badRequest('Invalid or missing messages in the request body');
51 | }
52 |
53 | let embeddings: Embeddings;
54 | let model: BaseChatModel;
55 | let store: VectorStore;
56 | let chatHistory;
57 | const sessionId = ((chatContext as any)?.sessionId as string) || uuidv4();
58 | context.log(`userId: ${userId}, sessionId: ${sessionId}`);
59 |
60 | if (azureOpenAiEndpoint) {
61 | const credentials = getCredentials();
62 | const azureADTokenProvider = getAzureOpenAiTokenProvider();
63 |
64 | // Initialize models and vector database
65 | embeddings = new AzureOpenAIEmbeddings({ azureADTokenProvider });
66 | model = new AzureChatOpenAI({
67 | // Controls randomness. 0 = deterministic, 1 = maximum randomness
68 | temperature: 0.7,
69 | azureADTokenProvider,
70 | });
71 | store = new AzureCosmosDBNoSQLVectorStore(embeddings, { credentials });
72 |
73 | // Initialize chat history
74 | chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({
75 | sessionId,
76 | userId,
77 | credentials,
78 | });
79 | } else {
80 | // If no environment variables are set, it means we are running locally
81 | context.log('No Azure OpenAI endpoint set, using Ollama models and local DB');
82 | embeddings = new OllamaEmbeddings({ model: ollamaEmbeddingsModel });
83 | model = new ChatOllama({
84 | temperature: 0.7,
85 | model: ollamaChatModel,
86 | });
87 | store = await FaissStore.load(faissStoreFolder, embeddings);
88 | chatHistory = new FileSystemChatMessageHistory({
89 | sessionId,
90 | userId,
91 | });
92 | }
93 |
94 | // Create the chain that combines the prompt with the documents
95 | const ragChain = await createStuffDocumentsChain({
96 | llm: model,
97 | prompt: ChatPromptTemplate.fromMessages([
98 | ['system', ragSystemPrompt],
99 | ['human', '{input}'],
100 | ]),
101 | documentPrompt: PromptTemplate.fromTemplate('[{source}]: {page_content}\n'),
102 | });
103 | // Handle chat history
104 | const ragChainWithHistory = new RunnableWithMessageHistory({
105 | runnable: ragChain,
106 | inputMessagesKey: 'input',
107 | historyMessagesKey: 'chat_history',
108 | getMessageHistory: async () => chatHistory,
109 | });
110 | // Retriever to search for the documents in the database
111 | const retriever = store.asRetriever(3);
112 | const question = messages.at(-1)!.content;
113 | const responseStream = await ragChainWithHistory.stream(
114 | {
115 | input: question,
116 | context: await retriever.invoke(question),
117 | },
118 | { configurable: { sessionId } },
119 | );
120 | const jsonStream = Readable.from(createJsonStream(responseStream, sessionId));
121 |
122 | // Create a short title for this chat session
123 | const { title } = await chatHistory.getContext();
124 | if (!title) {
125 | const response = await ChatPromptTemplate.fromMessages([
126 | ['system', titleSystemPrompt],
127 | ['human', '{input}'],
128 | ])
129 | .pipe(model)
130 | .invoke({ input: question });
131 | context.log(`Title for session: ${response.content as string}`);
132 | chatHistory.setContext({ title: response.content });
133 | }
134 |
135 | return data(jsonStream, {
136 | 'Content-Type': 'application/x-ndjson',
137 | 'Transfer-Encoding': 'chunked',
138 | });
139 | } catch (_error: unknown) {
140 | const error = _error as Error;
141 | context.error(`Error when processing chat-post request: ${error.message}`);
142 |
143 | return serviceUnavailable('Service temporarily unavailable. Please try again later.');
144 | }
145 | }
146 |
147 | // Transform the response chunks into a JSON stream
148 | async function* createJsonStream(chunks: AsyncIterable, sessionId: string) {
149 | for await (const chunk of chunks) {
150 | if (!chunk) continue;
151 |
152 | const responseChunk: AIChatCompletionDelta = {
153 | delta: {
154 | content: chunk,
155 | role: 'assistant',
156 | },
157 | context: {
158 | sessionId,
159 | },
160 | };
161 |
162 | // Format response chunks in Newline delimited JSON
163 | // see https://github.com/ndjson/ndjson-spec
164 | yield JSON.stringify(responseChunk) + '\n';
165 | }
166 | }
167 |
168 | app.setup({ enableHttpStream: true });
169 | app.http('chats-post', {
170 | route: 'chats/stream',
171 | methods: ['POST'],
172 | authLevel: 'anonymous',
173 | handler: postChats,
174 | });
175 |
--------------------------------------------------------------------------------
/docs/old-tutorial/04-session.md:
--------------------------------------------------------------------------------
1 | # Generate completion using `chain` in the `chat` API
2 |
3 | In this session, we will learn how to use a very important feature in LangChain.js: `chain`.
4 |
5 | ## What are `chains`?
6 |
7 | `Chains` in large language models (LLMs), such as GPT (Generative Pre-trained Transformer), refer to a technique where the outputs of a previous interaction are used as inputs for the next interaction with the model. This allows for a continuous and coherent conversation or thought process, where each new response takes into account the previous context, creating a "chain" of connected interactions.
8 |
9 | For example, if you ask the model about the recipe for a cake and then want to know how to change that recipe to make it vegan, the model will use the information from the previous conversation about the cake recipe to provide a relevant and specific answer about how to make the vegan version, rather than starting from scratch. This is useful for maintaining continuity and relevance in conversations or the processing of sequential information.
10 |
11 | And, of course, **[LangChain.js supports this functionality](https://js.langchain.com/docs/expression_language/streaming#chains)**. Let's see how we can use it.
12 |
13 | ## Integrating `ChatPromptTemplate` for dynamics interactions
14 |
15 | Open the `chat.ts` file and let's make some significant changes to this code.
16 |
17 | - `packages/packages/api/functions/chat.ts`:
18 |
19 | ```typescript
20 | import { app, HttpRequest, HttpResponseInit, InvocationContext } from '@azure/functions';
21 | import { badRequest, serviceUnavailable } from '../utils';
22 | import { AzureChatOpenAI, AzureOpenAIEmbeddings } from '@langchain/azure-openai';
23 | import { ChatPromptTemplate } from '@langchain/core/prompts';
24 |
25 | import 'dotenv/config';
26 |
27 | export async function chat(request: HttpRequest, context: InvocationContext): Promise {
28 | try {
29 | const requestBody: any = await request.json();
30 |
31 | if (!requestBody?.question) {
32 | return badRequest(new Error('No question provided'));
33 | }
34 |
35 | const { question } = requestBody;
36 |
37 | const embeddings = new AzureOpenAIEmbeddings();
38 |
39 | const model = new AzureChatOpenAI();
40 |
41 | const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([
42 | ['system', "Answer the user's questions based on the below context:\n\n{context}"],
43 | ['human', '{input}'],
44 | ]);
45 |
46 | return {
47 | status: 200,
48 | body: 'Testing chat function.',
49 | };
50 | } catch (error: unknown) {
51 | const error_ = error as Error;
52 | context.error(`Error when processing request: ${error_.message}`);
53 |
54 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
55 | }
56 | }
57 |
58 | app.http('chat', {
59 | route: 'chat',
60 | methods: ['POST'],
61 | authLevel: 'anonymous',
62 | handler: chat,
63 | });
64 | ```
65 |
66 | Let's understand what we did here:
67 |
68 | First, we import the `ChatPromptTemplate` class from the `@langchain/core/prompts` package to create a chat prompt. This class is used to create a chat prompt that can be used to interact with the language model.
69 |
70 | Next, we create a conversation prompt called `questionAnsweringPrompt` that will be used to create a conversation with the language model.
71 |
72 | You will notice that right after we use the `ChatPromptTemplate` class, we are using the `fromMessages` method. This method is needed precisely to create a specific chat template prompt from individual chat messages or message-like tuples. That's why the return of this method is an array. So much so that, if you hit `CTRL + SPACE` in your code editor, you will see the following options below:
73 |
74 | 
75 |
76 | In this case, we put as the first parameter the type of message we are sending, which is `system` and `human`. The second parameter is the message we are sending. In this case, the system message is `Answer the user's questions based on the below context:\n\n{context}` and the user message is `{input}`.
77 |
78 | ## Implementing a `chain` for the `chat` API
79 |
80 | Now that we've created a more dynamic chat, let's implement the `chain` so that the conversation is more fluid and coherent. To do this, add the following code right after creating the `questionAnsweringPrompt`:
81 |
82 | ```typescript
83 | (... the previous code ...)
84 |
85 | const combineDocsChain = await createStuffDocumentsChain({
86 | llm: model,
87 | prompt: questionAnsweringPrompt,
88 | });
89 |
90 | const store = new AzureCosmosDBVectorStore(embeddings, {});
91 |
92 | const chain = await createRetrievalChain({
93 | retriever: store.asRetriever(),
94 | combineDocsChain,
95 | });
96 |
97 | const response = await chain.invoke({
98 | input: question
99 | });
100 |
101 | return response
102 | ? ok({ response })
103 | : serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
104 | } catch (error: unknown) {
105 | const error_ = error as Error;
106 | context.error(`Error when processing chat request: ${error_.message}`);
107 |
108 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
109 | };
110 |
111 | app.http('chat', {
112 | route: 'chat',
113 | methods: ['POST'],
114 | authLevel: 'anonymous',
115 | handler: chat,
116 | });
117 | ```
118 |
119 | Let's understand again in each line what we did:
120 |
121 | We created a `combineDocsChain` using the `createStuffDocumentsChain` function.
122 |
123 | This function is used to create a string that passes a list of documents to a template. There a few parameters in this function. There include `llm`, which is the language model we are using, and `prompt`, which is the conversation we are having with the model. We will use them to create the chain.
124 |
125 | Just as we did in the `upload` API, we will need to store the vectors in the database. To do this, we created a variable called `store` so that we can instantiate the `AzureCosmosDBVectorStore` class. This class is used to create a vector that can be used to store and retrieve vectors from the language model.
126 |
127 | We create the `chain` using the `createRetrievalChain` function. This function is used precisely to create a retrieval chain that will retrieve the documents and then pass them on to the chat. That's why this function has two parameters:
128 |
129 | - `retriever`: which aims to return a list of documents.
130 | - `combineDocsChain`: which will reproduce a string output.
131 |
132 | Finally, we invoked the `chain` using the `invoke` method. This method is used to invoke the chain with the input question and get the response from the language model.
133 |
134 | Wow! we have completed our `chat` API. Now, let's test our API together with the `upload` API.
135 |
136 | ## Testing the `chat` API
137 |
138 | To test the two APIs, let's open the terminal again and run the following command inside the `packages/api` folder:
139 |
140 | ```bash
141 | npm run start
142 | ```
143 |
144 | The message related to the `chat` and `upload` API will appear again. Open a new terminal and include the following command:
145 |
146 | ```bash
147 | curl -F "file=@data/support.pdf" http://localhost:7071/api/upload
148 | ```
149 |
150 | If all goes well, you will see the following message:
151 |
152 | ```json
153 | {
154 | "message": "PDF file uploaded successfully."
155 | }
156 | ```
157 |
158 | Now, let's test the `chat` API. To do this, go to the `api.http` file. We'll be using the `REST Client` extension to test the `chat` API. When it opens, send the request and see the result.
159 |
160 | 
161 |
162 | You will see the exact response requested in the `chat` request. If you want to see the whole process, take a look at the gif below:
163 |
164 | 
165 |
166 | We haven't finished our project yet. We still have one more very important item that we mustn't forget to implement in a chat: `stream` response. We'll learn how to do this in the next session.
167 |
168 | ▶ **[Next Step: Generate `stream` response](./05-session.md)**
169 |
--------------------------------------------------------------------------------
/docs/tutorial/01-introduction.md:
--------------------------------------------------------------------------------
1 | # Tutorial - Create a Serverless AI Chat with RAG using LangChain.js and TypeScript
2 |
3 | Welcome to the tutorial _Create a Serverless AI Chat with RAG using LangChain.js and TypeScript_.
4 |
5 | This tutorial will guide you through creating a serverless a AI Chat and RAG (Retrieval-Augmented Generation) application using LangChain.js, Azure Functions, Azure Cosmos DB for MongoDB vCore, Azure Blob Storage, and Azure Static Web Apps.
6 |
7 | The chatbot you're building can answer questions based on a set of enterprise documents uploaded from a fictional company called _Contoso Real Estate_.
8 |
9 | Here's an example of the application in action:
10 |
11 | 
12 |
13 | This tutorial will teach you how to build a serverless application using Azure Functions and LangChain.js.
14 |
15 | LangChain.js is a library used for building AI apps. It integrates LLMs, large language models like GPT, Claude-2 and more. It also makes it easy to develop AI-driven chatbots. Next, you'll learn how to set up the environment and deploy the application.
16 |
17 | The front end of the application is provided so that you can focus on the backend code and technologies.
18 |
19 | ## Prerequisites
20 |
21 | You can run the application in the tutorial using one of the following options:
22 |
23 | - Run the application locally on your machine.
24 | - Run the application using Codespaces.
25 |
26 | ### Run using Codespaces
27 |
28 | > It is highly recommended to use Codespaces for this tutorial. Codespaces is a cloud-based tool that enables you to run development environments without installing any tools on your computer. This way, you can focus on the development process without worrying about the environment setup.
29 |
30 | If you decide to continue using **[Codespaces](https://github.com/features/codespaces)**, you can follow the steps described in the README.md file at the root of the project.
31 |
32 | > **Note**: If you are using Codespaces, you don't need to install any of the prerequisites mentioned above. Codespaces already has all the necessary tools installed. Codespaces can be used for free for up to 60 hours per month, and this is renewed every month.
33 |
34 | ### Run Locally
35 |
36 | If you choose to use a local environment, you'll need to install:
37 |
38 | - [Node.js](https://nodejs.org/en/download/)
39 | - [TypeScript](https://www.typescriptlang.org/download)
40 | - [Visual Studio Code](https://code.visualstudio.com/download)
41 | - [Azure Functions Core Tools](https://docs.microsoft.com/en-us/azure/azure-functions/functions-run-local?tabs=windows%2Ccsharp%2Cbash)
42 | - [Git](https://git-scm.com/downloads)
43 | - [Azure Developer CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)
44 |
45 | > If you are a Windows user, you'll need to install [PowerShell](https://learn.microsoft.com/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.4), [Git Bash](https://git-scm.com/downloads) or [WSL2](https://learn.microsoft.com/windows/wsl/install) to run the bash commands.
46 |
47 | ## Project Overview
48 |
49 | Building AI applications can be complex and time-consuming. By using LangChain.js and Azure Functions including Serverless technologies, you can greatly simplify the process. These tools streamline the development by managing infrastructure concerns and scaling automatically, allowing you to focus more on building the chatbot functionality and less on the underlying system architecture. This application is a chatbot that uses a set of enterprise documents to generate AI responses to user queries.
50 |
51 | The code sample includes sample data to make trying the application quick and easy, but feel free to replace it with your own. You'll use a fictitious company called Contoso Real Estate, and the experience allows its customers to ask support questions about the usage of the company's products. The sample data includes a set of documents that describes the company's terms of service, privacy policy, and support guide.
52 |
53 | ## Understanding the project architecture
54 |
55 | The architecture of the project is shown in the following diagram:
56 |
57 | 
58 |
59 | To understand the architecture of the project, let's break it down into its individual components:
60 |
61 | 1. **Web App:**
62 |
63 | - The user interface for the chatbot is a web application built with **[Lit](https://lit.dev/)** (a library for building web components) and hosted using **[Azure Static Web Apps](https://learn.microsoft.com/azure/static-web-apps/overview)**. It provides a chat interface for users they can use to ask questions.
64 | - The code is in the `packages/webapp` folder.
65 |
66 | 2. **Serverless API:**
67 |
68 | - When a user sends a query through the web app, it is sent via HTTP to an API built using Azure Functions.
69 | - The API uses LangChain.js to process the query.
70 | - The API manages the logic of corporate documents and responds with answers to chat queries.
71 | - The code for this functionality will be shown later in the tutorial and is in the `packages/api` folder.
72 |
73 | 3. **Database:**
74 |
75 | - Text extracted from the documents and the vectors generated by LangChain.js is stored in Azure Cosmos DB for MongoDB vCore.
76 | - The database allows for the storage and retrieval of text chunks using vector search, which enables quick and relevant responses based on the user's queries.
77 |
78 | 4. **File Storage:**
79 |
80 | - The source documents such as terms of service, privacy policy, and support guides for the Contoso Real Estate are stored in Azure Blob Storage. This is where the PDF documents are uploaded and retrieved from.
81 |
82 | 5. **Azure OpenAI Service:**
83 |
84 | - This service is where the AI Model (a Large Language Model or LLM) is hosted. The model can understand and generate natural language. This is used to embed text chunks or generate answers based on the vector search from the database.
85 |
86 | Let's examine the application flow based on the architecture diagram:
87 |
88 | - A user interacts with the chat interface in the web app
89 | - The web app sends the user's query to the Serverless API via HTTP calls
90 | - The Serverless API interacts with Azure OpenAI Service to generate a response, using the data from Azure Cosmos DB for MongoDB vCore.
91 | - If there's a need to reference the documents, Azure Blob Storage is used to retrieve the PDF documents.
92 | - The generated response is then sent back to the web app and displayed to the user.
93 |
94 | The architecture is based on the RAG (Retrieval-Augmented Generation) architecture. This architecture combines the ability to retrieve information from a database with the ability to generate text from a language model. You'll learn more about RAG later in the tutorial.
95 |
96 | ## Executing the Project
97 |
98 | Now that you understand the project's architecture, let's run it!
99 |
100 | Once you have `forked` and `cloned` the project, use the `starter` branch to continue with the tutorial. The `main` branch has the finished project if you wish to view it!
101 |
102 | To execute the project, follow these steps:
103 |
104 | 1. Install the project dependencies:
105 |
106 | ```bash
107 | npm install
108 | ```
109 |
110 | 2. To run the project, with only Front-end, execute the following command:
111 |
112 | ```bash
113 | npm run start:webapp
114 | ```
115 |
116 | > At this point, do not worry about the other scripts in the `package.json` file at the root of the project. They will be used throughout the tutorial.
117 |
118 | 3. Open your browser and go to `http://localhost:8000`. The application will be displayed, as shown in the image below:
119 |
120 | 
121 |
122 | ## Next Steps
123 |
124 | Here are some additional resources for you to delve into:
125 |
126 | - **[Azure Functions Documentation](https://learn.microsoft.com/azure/azure-functions/)**
127 | - **[Azure Cosmos DB for MongoDB vCore Documentation](https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/)**
128 | - **[Azure Blob Storage Documentation](https://learn.microsoft.com/azure/storage/blobs/)**
129 | - **[Azure Static Web Apps Documentation](https://learn.microsoft.com/azure/static-web-apps/)**
130 | - **[LangChain.js Documentation](https://js.langchain.com/docs/get_started/introduction)**
131 | - **[OpenAI API Documentation](https://platform.openai.com/docs/introduction)**
132 | - **[Lit Documentation](https://lit.dev/)**
133 | - **[TypeScript Documentation](https://www.typescriptlang.org/docs/)**
134 | - **[Node.js Documentation](https://nodejs.org/en/docs/)**
135 | - **[Visual Studio Code Documentation](https://code.visualstudio.com/docs)**
136 | - **[Git Documentation](https://git-scm.com/doc)**
137 | - **[Azure Developer CLI Documentation](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)**
138 | - **[PowerShell Documentation](https://learn.microsoft.com/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.4)**
139 |
140 | In the next section, we will start to create the API using Azure Functions. See you there!
141 |
142 | **[Next Step: Setting Up the Serverless Environment using Azure Functions ➡️](./02-setting-up-azure-functions.md)**
143 |
--------------------------------------------------------------------------------
/docs/readme.md:
--------------------------------------------------------------------------------
1 | ---
2 | page_type: sample
3 | languages:
4 | - azdeveloper
5 | - javascript
6 | - typescript
7 | - nodejs
8 | - bicep
9 | products:
10 | - azure
11 | - azure-openai
12 | - ai-services
13 | urlFragment: serverless-chat-langchainjs
14 | name: Serverless AI Chat with RAG using LangChain.js
15 | description: Build your own serverless AI Chat with Retrieval-Augmented-Generation using LangChain.js, TypeScript and Azure.
16 | ---
17 |
18 |
19 |
20 | This sample shows how to build a serverless AI chat experience with Retrieval-Augmented Generation using [LangChain.js](https://js.langchain.com/) and Azure. The application is hosted on [Azure Static Web Apps](https://learn.microsoft.com/azure/static-web-apps/overview) and [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript), with [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search) as the vector database. You can use it as a starting point for building more complex AI applications.
21 |
22 | 
23 |
24 | ## Overview
25 |
26 | Building AI applications can be complex and time-consuming, but using LangChain.js and Azure serverless technologies allows to greatly simplify the process. This application is a chatbot that uses a set of enterprise documents to generate responses to user queries.
27 |
28 | We provide sample data to make this sample ready to try, but feel free to replace it with your own. We use a fictitious company called _Contoso Real Estate_, and the experience allows its customers to ask support questions about the usage of its products. The sample data includes a set of documents that describes its terms of service, privacy policy and a support guide.
29 |
30 | 
31 |
32 | This application is made from multiple components:
33 |
34 | - A web app made with a single chat web component built with [Lit](https://lit.dev) and hosted on [Azure Static Web Apps](https://learn.microsoft.com/azure/static-web-apps/overview). The code is located in the `packages/webapp` folder.
35 |
36 | - A serverless API built with [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript) and using [LangChain.js](https://js.langchain.com/) to ingest the documents and generate responses to the user chat queries. The code is located in the `packages/api` folder.
37 |
38 | - A database to store chat sessions and the text extracted from the documents and the vectors generated by LangChain.js, using [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/).
39 |
40 | - A file storage to store the source documents, using [Azure Blob Storage](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction).
41 |
42 | ## Prerequisites
43 |
44 | - [Node.js LTS](https://nodejs.org/download/)
45 | - [Azure Developer CLI](https://aka.ms/azure-dev/install)
46 | - [Git](https://git-scm.com/downloads)
47 | - Azure account. If you're new to Azure, [get an Azure account for free](https://azure.microsoft.com/free) to get free Azure credits to get started. If you're a student, you can also get free credits with [Azure for Students](https://aka.ms/azureforstudents).
48 | - Azure subscription with access enabled for the Azure OpenAI service. You can request access with [this form](https://aka.ms/oaiapply).
49 | - Azure account permissions:
50 | - Your Azure account must have `Microsoft.Authorization/roleAssignments/write` permissions, such as [Role Based Access Control Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview), [User Access Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#user-access-administrator), or [Owner](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#owner). If you don't have subscription-level permissions, you must be granted [RBAC](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview) for an existing resource group and [deploy to that existing group](docs/deploy_existing.md#resource-group).
51 | - Your Azure account also needs `Microsoft.Resources/deployments/write` permissions on the subscription level.
52 |
53 | ## Setup the sample
54 |
55 | You can run this project directly in your browser by using GitHub Codespaces, which will open a web-based VS Code.
56 |
57 | 1. [**Fork**](https://github.com/Azure-Samples/serverless-chat-langchainjs/fork) the project to create your own copy of this repository.
58 | 2. On your forked repository, select the **Code** button, then the **Codespaces** tab, and clink on the button **Create codespace on main**.
59 | 
60 | 3. Wait for the Codespace to be created, it should take a few minutes.
61 |
62 | ## Deploy on Azure
63 |
64 | 1. Open a terminal at the root of the project.
65 | 2. Authenticate with Azure by running `azd auth login`.
66 | 3. Run `azd up` to deploy the application to Azure. This will provision Azure resources, deploy this sample, and build the search index based on the files found in the `./data` folder.
67 | - You will be prompted to select a base location for the resources. If you're unsure of which location to choose, select `eastus2`.
68 | - By default, the OpenAI resource will be deployed to `eastus2`. You can set a different location with `azd env set AZURE_OPENAI_RESOURCE_GROUP_LOCATION `. Currently only a short list of locations is accepted. That location list is based on the [OpenAI model availability table](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability) and may become outdated as availability changes.
69 |
70 | The deployment process will take a few minutes. Once it's done, you'll see the URL of the web app in the terminal.
71 |
72 | 
73 |
74 | You can now open the web app in your browser and start chatting with the bot.
75 |
76 | ## Enable CI/CD (Optional)
77 |
78 | If you want to enable Continuous Deployment for your forked repository, you need to configure the Azure pipeline first:
79 |
80 | 1. Open a terminal at the root of your forked project.
81 | 2. Authenticate with Azure by running `azd auth login`.
82 | 3. Run `azd pipeline config` to configure the required secrets and variables for connecting to Azure from GitHub Actions.
83 | - This command will set up the necessary Azure service principal and configure GitHub repository secrets.
84 | - Follow the prompts to complete the configuration.
85 |
86 | Once configured, the GitHub Actions workflow will automatically deploy your application to Azure whenever you push changes to the main branch.
87 |
88 | ## Key concepts
89 |
90 | Our API is composed of two main endpoints:
91 |
92 | - `POST /documents`: This endpoint allows to upload a PDF documents in the database. Using LangChain.js, we extract the text from the PDF file, split it into smaller chunks, and generate vectors for each chunk. We store the text and the vectors in the database for later use.
93 |
94 | - `POST /chats`: This endpoint receives a list of messages, the last being the user query and returns a response generated by the LLM. It uses the documents stored in the database to generate the response. We use LangChain.js components to connect to the database, load the documents and perform a vector search after vectorizing the user query. After that, the most relevant documents are injected into the prompt, and we generate the response. While this process seems complex, LangChain.js does all the heavy lifting for us so we can focus on the application flow.
95 |
96 | The `/documents` endpoint is used to ingest the documents after the application is deployed by uploading the PDFs, using either `curl` commands or the Node.js script we built (have a look at the `postup` hook in the `azure.yaml` file).
97 |
98 | The web app is a simple chat interface that sends the user queries to the `/chat` endpoint and displays the responses.
99 | We use the [HTTP protocol for AI chat apps](https://aka.ms/chatprotocol) to communicate between the web app and the API.
100 |
101 | ## Clean up
102 |
103 | To clean up all the Azure resources created by this sample:
104 |
105 | 1. Run `azd down --purge`
106 | 2. When asked if you are sure you want to continue, enter `y`
107 |
108 | The resource group and all the resources will be deleted.
109 |
110 | ## Troubleshooting
111 |
112 | If you have any issue when running or deploying this sample, please check the [troubleshooting guide](./troubleshooting.md). If you can't find a solution to your problem, please [open an issue](https://github.com/Azure-Samples/serverless-chat-langchainjs/issues) in this repository.
113 |
114 | ## Next steps
115 |
116 | Here are some resources to learn more about the technologies used in this sample:
117 |
118 | - [LangChain.js documentation](https://js.langchain.com)
119 | - [Generative AI with JavaScript](https://github.com/microsoft/generative-ai-with-javascript)
120 | - [Generative AI For Beginners](https://github.com/microsoft/generative-ai-for-beginners)
121 | - [Azure OpenAI Service](https://learn.microsoft.com/azure/ai-services/openai/overview)
122 | - [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/)
123 | - [Ask YouTube: LangChain.js + Azure Quickstart sample](https://github.com/Azure-Samples/langchainjs-quickstart-demo)
124 | - [Chat + Enterprise data with Azure OpenAI and Azure AI Search](https://github.com/Azure-Samples/azure-search-openai-javascript)
125 | - [Revolutionize your Enterprise Data with Chat: Next-gen Apps w/ Azure OpenAI and AI Search](https://aka.ms/entgptsearchblog)
126 |
127 | You can also find [more Azure AI samples here](https://github.com/Azure-Samples/azureai-samples).
128 |
--------------------------------------------------------------------------------
/docs/tutorial/04-preparing-understanding-language-models.md:
--------------------------------------------------------------------------------
1 | # Preparing and Understanding Language Models: Configuring Azure OpenAI Service and Installing Ollama with Llama3.1 8B
2 |
3 | This section we will cover the language models used in the project. Throughout the tutorial, we will also learn how to generate the environment variables needed to use the Azure Services, including the **[Azure OpenAI Service](https://learn.microsoft.com/azure/ai-services/openai/overview)**.
4 |
5 | We will also teach you how to use **[Ollama](https://ollama.com/)** with **[Llama3.1 8B](https://www.llama.com/)**, an Open Source Language Model, if you want to use it locally.
6 |
7 | ## Models to be used in the project
8 |
9 | We will teach you how to use two different language models: GPT-3.5 Turbo integrated with _Azure OpenAI Service_ (on Azure) and _Ollama with Llama3.1 8B_ (if you decide to use a model locally). Let's take a look at each of them.
10 |
11 | ### GPT-3.5 Turbo Integrated with Azure OpenAI Service
12 |
13 | 
14 |
15 | OpenAI has developed GPT-3.5 Turbo, an improved version of the already impressive GPT-3.5. This model provides faster and more correct responses, making it a reliable tool for companies and developers who need to generate text or perform other tasks related to Natural Language Processing (NLP).
16 |
17 | You have the choice to use either **[OpenAI Service](https://openai.com/)** or **[Azure OpenAI Service](https://azure.microsoft.com/products/ai-services/openai-service)**. For this tutorial, we will be using Azure OpenAI Service, a version of OpenAI Service hosted on the Azure platform.
18 |
19 | Azure OpenAI Service provides REST API access in many programming languages, including Python, Node.js, and C#. Additionally, it offers advanced language models like GPT-4, GPT-4-Turbo with Vision, which are versatile and adaptable to various tasks such as content generation, summarization, image recognition, semantic search, and text-to-code translation.
20 |
21 | ### Ollama with Llama3.1 8B
22 |
23 | 
24 |
25 | **[Ollama](https://ollama.com/)** presents itself as an open-source solution, offering a transparent and modifiable platform. The Llama3.1 8B has 8 billion parameters and is designed to be effective, efficient in terms of cost and scability.
26 |
27 | Ollama's openness encourages innovation and collaboration within the developer community. Users can adapt the model to their specific needs, experiment with innovative ideas, or integrate the model in ways that proprietary services might not allow.
28 |
29 | Additionally, using an open-source language model can decrease expenses, which is a crucial factor for projects with restricted budgets or for those who only wish to experiment with language models.
30 |
31 | 
32 |
33 | ## Creating Azure resources
34 |
35 | To use the Azure OpenAI Service, you need an Azure account. If you don't have one, you can create one for free [here](https://azure.microsoft.com/pt-br/free/).
36 |
37 | > **Note:** if you are a student, you can get free credits for Azure through Microsoft Azure for Students.
38 |
39 | > **Note:** if you decide to use the Azure OpenAI Service, you must fill out a request form to access the service. You can request access to the service by filling out the form [here](https://aka.ms/oaiapply).
40 |
41 | After creating your Azure account and being approved of the Azure OpenAI Service, we will continue as follows:
42 |
43 | > **Note:** Instead of PowerShell, you can also use Git Bash or WSL to run the Azure Developer CLI commands.
44 |
45 | | Step | Description |
46 | | ---- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
47 | | 1 | Return to the `main` branch of the project repository. |
48 | | 2 | Open a terminal at the root of the project. |
49 | | 3 | To deploy the application to Azure, run the command **run azd**. This will provision Azure resources, deploy the sample, and build the search index based on the files found in the **./data** folder. |
50 | | 4 | You will be prompted to select a base location for the resources. If you don't know which one to choose, you can select **eastus2**. |
51 | | 5 | By default, the OpenAI resource will be deployed to **eastus2**. You can set a different location with `azd env set AZURE_OPENAI_RESOURCE_GROUP_LOCATION `. Currently only a brief list of locations is accepted. That location list is based on the **[OpenAI model availability table](https://learn.microsoft.com/pt-br/azure/ai-services/openai/concepts/models#standard-deployment-model-availability)** and may become outdated as availability changes. |
52 |
53 | The deployment process will only take a few minutes. Afterward, the URL of the web app will appear in the terminal.
54 |
55 | 
56 |
57 | Open the link for the web app in your browser and start chatting with the bot.
58 |
59 | To check the resources created, go to the Azure portal and look for a resource group containing the following resources:
60 |
61 | 
62 |
63 | The templates used to deploy the resources can be found in the `infra` folder, where we used Infrastructure as Code to set up the resources.
64 |
65 | > **Note:** if you want to simply browse the project code and see it in action, go to the `main` branch where the entire application is ready and follow the steps described in the article [Build a serverless AI Chat with RAG using LangChain.js](https://techcommunity.microsoft.com/t5/apps-on-azure-blog/build-a-serverless-chatgpt-with-rag-using-langchain-js/ba-p/4111041), written by **[Yohan Lasorsa](https://twitter.com/sinedied)**.
66 |
67 | ## Installing Ollama and Local Models
68 |
69 | Before installing Ollama, please ensure you meet the prerequisites, which include sufficient free space, recommended amounts of RAM, and a fast CPU or GPU. For more details about running LLM locally, see **[here](open-webui/open-webui#736)**.
70 |
71 | #### Memory requirements
72 |
73 | - _7b models generally require at least 8GB of RAM_
74 | - _13b models generally require at least 16GB of RAM_
75 | - _70b models generally require at least 64GB of RAM_
76 |
77 | > **Note:** If you encounter issues with higher quantization levels, consider using the q4 model or close any other memory-intensive programs.
78 |
79 | > **Note:** Ollama supports various operating systems such as Linux, MacOS, and Windows. For installation details, visit the official project documentation **[here](https://ollama.com/download)**.
80 |
81 | > **Note:** Ollama cannot be used in Codespaces. It must be installed on a local machine for use.
82 |
83 | To begin, download the necessary models for this project by running the following commands in your terminal:
84 |
85 | ```bash
86 | ollama pull llama3.1:latest
87 | ollama pull nomic-embed-text:latest
88 | ```
89 |
90 | We will use the Llama3.1 8B model, a powerful language model, and the All-MiniLM model, a small embedding model, to generate vectors from the text for the chatbot.
91 |
92 | > **Note:** The Llama3.1 model will download several gigabytes of data, so the process may take some time depending on your internet connection.
93 |
94 | After downloading the models, you can verify the proper functioning of the Ollama server by executing the following command:
95 |
96 | ```bash
97 | ollama run llama3.1:latest
98 | ```
99 |
100 | An invite will be displayed in your terminal, allowing you to directly communicate with the AI model in a chat-like format.
101 |
102 | 
103 |
104 | Ask the model some questions and watch the answers. This will provide insight into the model's capabilities and how to interact with it.
105 |
106 | After to finish to test the Ollama server, you can stop it by pressing **Ctrl+D** in your terminal.
107 |
108 | ## Next Steps
109 |
110 | This tutorial covers language models that will be used in the project. Choose the best model to suit your needs. To use the Azure OpenAI Service, follow the instructions to set up the service in Azure. To use Ollama with Llama3.1 8B, follow the instructions to install Ollama and the local models.
111 |
112 | To begin developing the application, we first need to create some configuration files for the project. We'll cover this in the next section!
113 |
114 | **[⬅️ Back: Understanding the RAG (Retrieval Augmented Generation) architecture](03-understanding-rag.md)** | **[Next: Developing the `chat` API ➡️ ](./05-config-files-app.md)**
115 |
--------------------------------------------------------------------------------
/docs/old-tutorial/01-session.md:
--------------------------------------------------------------------------------
1 | # Session 01: Creating a function with Azure OpenAI, LangChain and Azure Functions
2 |
3 | **[Article - How to create a structure the new v4 programming model of Azure Functions](https://techcommunity.microsoft.com/t5/educator-developer-blog/step-by-step-guide-migrating-v3-to-v4-programming-model-for/ba-p/3897691)**
4 |
5 | ## Overview
6 |
7 | In this tutorial, we will create a function that uses Azure OpenAI to answer questions. To do this, we will use the `@langchain/azure-openai` and `langchain` packages. And then, we will use Azure Functions v4 programming model to create the function.
8 |
9 | ## Install the Azure OpenAI SDK package and LangChain.js
10 |
11 | Now that we have the initial structure of the project, let's install the Azure OpenAI SDK package. To do this, type the following command in the terminal:
12 |
13 | - `packages/api`
14 |
15 | ```bash
16 | npm install -S @langchain/azure-openai
17 | ```
18 |
19 | The package above depends on the installation of `langchain`. If you haven't installed it yet, run the following command:
20 |
21 | ```bash
22 | npm install -S langchain
23 | ```
24 |
25 | ## Configure access credentials
26 |
27 | As we will need to include the access credentials to Azure OpenAI, let's create a `.env` file at the root of the project with the following variables:
28 |
29 | - `.env`
30 |
31 | ```env
32 | AZURE_OPENAI_API_ENDPOINT=""
33 | AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME=""
34 | AZURE_OPENAI_API_KEY=""
35 | ```
36 |
37 | **todo: explain what each variable is**
38 |
39 | > you can get these credentials in the Azure portal. If you don't have an account, you can create one for free. **[Link](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal)**
40 |
41 | We will need to install the `dotenv` package to load the environment variables. Run the following command:
42 |
43 | ```bash
44 | npm install -S dotenv
45 | ```
46 |
47 | ## Create `Embeddings` with Azure OpenAI in the `Chat.ts` function
48 |
49 | Now that we have the Azure OpenAI SDK package installed and the access credentials configured, open the `functions/chat.ts` file and inside the `chat` function add the following code:
50 |
51 | - `packages/api/src/functions/chat.ts`
52 |
53 | ```typescript
54 | import { HttpRequest, HttpResponseInit, InvocationContext } from '@azure/functions';
55 | import { AzureOpenAIEmbeddings } from '@langchain/azure-openai';
56 | import 'dotenv/config';
57 |
58 | export async function chat(request: HttpRequest, context: InvocationContext): Promise {
59 | context.log(`Http function processed request for url "${request.url}"`);
60 |
61 | try {
62 | const requestBody: any = await request.json();
63 |
64 | if (!requestBody?.question) {
65 | return {
66 | status: 400,
67 | jsonBody: {
68 | error: 'No question provided',
69 | },
70 | };
71 | }
72 |
73 | const { question } = requestBody;
74 |
75 | const embeddings = new AzureOpenAIEmbeddings();
76 |
77 | const prompt = `Question: ${question}`;
78 | context.log(`Sending prompt to the model: ${prompt}`);
79 |
80 | const promptResponse = await embeddings.embedQuery(prompt);
81 |
82 | if (promptResponse) {
83 | return {
84 | status: 200,
85 | jsonBody: {
86 | promptResponse,
87 | },
88 | };
89 | } else {
90 | return {
91 | status: 503,
92 | jsonBody: {
93 | error: 'Service temporarily unavailable. Please try again later.',
94 | },
95 | };
96 | }
97 | } catch (error: unknown) {
98 | const err = error as Error;
99 | context.error(`Error when processing chat request: ${err.message}`);
100 |
101 | return {
102 | status: 503,
103 | jsonBody: {
104 | error: 'Service temporarily unavailable. Please try again later.',
105 | },
106 | };
107 | }
108 | }
109 | ```
110 |
111 | Let's understand what we did:
112 |
113 | 1. We import the `AzureOpenAIEmbeddings` class from the `@langchain/azure-openai` package and the `dotenv` package to load the environment variables.
114 |
115 | 2. As this is a `POST`, we need to check if the request body contains the `question` key. If it does not, we return a 400 error.
116 |
117 | 3. We instantiate the `AzureOpenAIEmbeddings`, which will allow us to retrieve the environment variables contained in the `.env` file.
118 |
119 | 4. We create a prompt with the question received in the request. The prompt is a string that contains the question and the keyword `Question:`. This is necessary for the Azure OpenAI model to understand that we are asking a question.
120 |
121 | 5. Sending the prompt to the Azure OpenAI model and returning the response.
122 |
123 | This code will need to be refactored later. But, let's first focus on testing the `chat` function locally.
124 |
125 | ## Testing the `Chat` API
126 |
127 | Before testing the `chat` function, let's create a file called `api.http` at the root of the project with the following content:
128 |
129 | - `packages/api/api.http`
130 |
131 | ```http
132 | ##################################################################
133 | # VS Code with REST Client extension is needed to use this file.
134 | # Download at: https://aka.ms/vscode/rest-client
135 | ##################################################################
136 |
137 | @api_host = http://localhost:7071
138 |
139 | # Chat with a bot (this is a sample ---> will be change)
140 | POST {{api_host}}/api/chat
141 | Content-Type: application/json
142 |
143 | {
144 | "question": "What is the United States currency?"
145 | }
146 | ```
147 |
148 | We recommend the `REST Client` extension for Visual Studio Code. With it, you can execute HTTP requests directly from your editor.
149 |
150 | Now that we have the code ready, let's test the `chat` function locally. Run the following command:
151 |
152 | ```bash
153 | npm run start
154 | ```
155 |
156 | You should see the following message in the terminal:
157 |
158 | ```bash
159 | Worker process started and initialized.
160 |
161 | Functions:
162 |
163 | chat: [POST] http://localhost:7071/api/chat
164 | ```
165 |
166 | Now open the `api.http` file and click the `Send Request` button that will appear in the upper right corner of the file. You should see the response from the Azure OpenAI model.
167 |
168 | If the response appears as shown in the image below, congratulations! We have just created our first function using Azure OpenAI, with LangChain and Azure Functions.
169 |
170 | 
171 |
172 | Maybe you wonder: 'Why is `promptResponse` returning an array of numbers?' This happens because `embedQuery` returns an array of numbers. In NPL (Natural Language Processing), these numbers represent the vector representation of the model's response.
173 |
174 | **todo: explain better what a vector representation is**
175 |
176 | ## Refactoring the `chat` function
177 |
178 | Now that we have the `chat` function working, let's refactor the code to make it clearer and easier to maintain. You may have noticed that the code contains many `statusCode`, which ends up making the code quite repetitive!
179 |
180 | Inside the `src` folder, create a folder called `utils` and inside it create a file called: `http-helper.ts` with the following block of code:
181 |
182 | - `packages/api/src/utils/http-helper.ts`
183 |
184 | ```typescript
185 | import { HttpResponseInit } from '@azure/functions';
186 |
187 | export function badRequest(error: Error): HttpResponseInit {
188 | return {
189 | status: 400,
190 | jsonBody: {
191 | error: error.message,
192 | },
193 | };
194 | }
195 |
196 | export function notFound(error: Error): HttpResponseInit {
197 | return {
198 | status: 404,
199 | jsonBody: {
200 | error: error.message,
201 | },
202 | };
203 | }
204 |
205 | export function serviceUnavailable(error: Error): HttpResponseInit {
206 | return {
207 | status: 503,
208 | jsonBody: {
209 | error: error.message,
210 | },
211 | };
212 | }
213 |
214 | export function internalServerError(error: Error): HttpResponseInit {
215 | return {
216 | status: 500,
217 | jsonBody: {
218 | error: error.message,
219 | },
220 | };
221 | }
222 |
223 | export function unauthorized(error: Error): HttpResponseInit {
224 | return {
225 | status: 401,
226 | jsonBody: {
227 | error: error.message,
228 | },
229 | };
230 | }
231 |
232 | export function noContent(): HttpResponseInit {
233 | return {
234 | status: 204,
235 | };
236 | }
237 |
238 | export function created(body: Record): HttpResponseInit {
239 | return {
240 | status: 201,
241 | jsonBody: body,
242 | };
243 | }
244 |
245 | export function ok(body: Record): HttpResponseInit {
246 | return {
247 | status: 200,
248 | jsonBody: body,
249 | };
250 | }
251 | ```
252 |
253 | Note that we abstracted the logic of returning each status code into separate functions. This will help us to keep the code cleaner and easier to maintain.
254 |
255 | Inside that same `utils` folder, now create a file called `index.ts` with the following block of code:
256 |
257 | - `packages/api/src/utils/index.ts`
258 |
259 | ```typescript
260 | export * from './http-helper';
261 | ```
262 |
263 | Done! Now that we have the status code return functions abstracted, let's refactor the `chat.ts` function to use these functions.
264 |
265 | Open the file `chat.ts` and replace the code block with:
266 |
267 | - `packages/api/src/functions/chat.ts`
268 |
269 | ```typescript
270 | import { HttpRequest, InvocationContext, HttpResponseInit } from '@azure/functions';
271 | import { AzureOpenAIEmbeddings } from '@langchain/azure-openai';
272 | import 'dotenv/config';
273 | import { badRequest, serviceUnavailable, ok } from '../utils';
274 |
275 | export async function chat(request: HttpRequest, context: InvocationContext): Promise {
276 | context.log(`Http function processed request for url "${request.url}"`);
277 |
278 | try {
279 | const requestBody: any = await request.json();
280 |
281 | if (!requestBody?.question) {
282 | return badRequest(new Error('No question provided'));
283 | }
284 |
285 | const { question } = requestBody;
286 |
287 | const embeddings = new AzureOpenAIEmbeddings();
288 |
289 | const prompt = `Question: ${question}`;
290 | context.log(`Sending prompt to the model: ${prompt}`);
291 |
292 | const promptResponse = await embeddings.embedQuery(prompt);
293 |
294 | return promptResponse
295 | ? ok({ promptResponse })
296 | : serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
297 | } catch (error: unknown) {
298 | const error_ = error as Error;
299 | context.error(`Error when processing chat request: ${error_.message}`);
300 |
301 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
302 | }
303 | }
304 | ```
305 |
306 | Now that we've refactored the `chat` function, let's test it again locally. Run the command:
307 |
308 | ```bash
309 | npm run start
310 | ```
311 |
312 | Open the `api.http` file and click the `Send Request` button. You should see the response from the Azure OpenAI template. If everything went well, congratulations! We've refactored the `chat` function and made it cleaner and easier to maintain. 🎉
313 |
314 | At this point, let's stop implementing the `chat` function, we'll come back to it later.
315 |
316 | In the next step, we will start creating the use of `CosmosDB LC vector store` to store the vectors generated by Azure OpenAI.
317 |
318 | ▶ **[Next Step: Init `CosmosDB LC Vector Store` in the project](./02-session.md)**
319 |
--------------------------------------------------------------------------------
/docs/old-tutorial/05-session.md:
--------------------------------------------------------------------------------
1 | # Generate a stream response in the `chat` API
2 |
3 | In this session, we will learn how to generate a stream response in the `chat` API, using LangChain.js and including the new feature on stream available also for the v4 of the Azure Functions programming model.
4 |
5 | ## What is streaming?
6 |
7 | Streaming is crucial for Large Language models (LLMs) for several reasons:
8 |
9 | - **It manages memory resources efficiently**: allowing models to process long texts without overloading memory.
10 | - **It improves scalability**: making it easier to process inputs of virtually unlimited size.
11 | - **Reduces latency in real-time interactions**: providing faster responses in virtual assistants and dialog systems.
12 | - **Facilitates training and inference** on large data sets, making the use of LLMs more practical and efficient.
13 | - **It can improve the quality of the text generated**: helping models to focus on smaller pieces of text for greater cohesion and contextual relevance.
14 | - **Supports distributed workflows**: allowing models to be scaled to meet intense processing demands.
15 |
16 | As such, certain large language models (LLMs) have the ability to send responses sequentially. This means that you don't need to wait for the full response to be received before you can start working with it. This feature is especially advantageous if you want to show the response to the user as it is produced or if you need to analyze and use the response while it is being formed.
17 |
18 | And LangChain.js supports the use of streaming. Making use of the `.stream()` method. If you want to know more about the `.stream()` method, you can access the **[official LangChain.js documentation](https://js.langchain.com/docs/use_cases/question_answering/streaming#chain-with-sources)**.
19 |
20 | ## Support for HTTP Streams in Azure Functions
21 |
22 | The Azure Functions product team recently announced the availability of support for HTTP Streams in version 4 of the Azure Functions programming model. With this, it is now possible to return stream responses in HTTP APIs, which is especially useful for real-time data streaming scenarios.
23 |
24 | To find out more about streaming support in Azure Functions v4, you can visit Microsoft's Tech Community blog by clicking **[here](https://techcommunity.microsoft.com/t5/apps-on-azure-blog/azure-functions-support-for-http-streams-in-node-js-is-now-in/ba-p/4066575)**.
25 |
26 | ## Enabling HTTP Streams support in Azure Functions
27 |
28 | Well, now that we've understood the importance of using streaming in a chat and how useful it can be, let's learn how we can introduce it into the `chat` API.
29 |
30 | The first thing we need to do is enable the new Azure Functions feature, which is streaming support. To do this, open the file `chat.ts` and include the following code:
31 |
32 | - `api/functions/chat.ts`
33 |
34 | ```typescript
35 | (... previous code ...)
36 |
37 | app.setup({ enableHttpStream: true });
38 | app.post('chat', {
39 | route: 'chat',
40 | authLevel: 'anonymous',
41 | handler: chat,
42 | });
43 | ```
44 |
45 | So the `chat.ts` file will look like this:
46 |
47 | - `api/functions/chat.ts`
48 |
49 | ```typescript
50 | import { Readable } from 'node:stream';
51 | import { Document } from '@langchain/core/documents';
52 | import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions';
53 | import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/azure-openai';
54 | import { ChatPromptTemplate } from '@langchain/core/prompts';
55 | import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
56 | import { AzureCosmosDBVectorStore } from '@langchain/community/vectorstores/azure_cosmosdb';
57 | import { createRetrievalChain } from 'langchain/chains/retrieval';
58 | import 'dotenv/config';
59 | import { badRequest, serviceUnavailable } from '../utils';
60 |
61 | export async function chat(request: HttpRequest, context: InvocationContext): Promise {
62 | context.log(`Http function processed request for url "${request.url}"`);
63 |
64 | try {
65 | const requestBody: any = await request.json();
66 |
67 | if (!requestBody?.question) {
68 | return badRequest(new Error('No question provided'));
69 | }
70 |
71 | const { question } = requestBody;
72 |
73 | const embeddings = new AzureOpenAIEmbeddings();
74 |
75 | const prompt = `Question: ${question}`;
76 | context.log(`Sending prompt to the model: ${prompt}`);
77 |
78 | const model = new AzureChatOpenAI();
79 |
80 | const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([
81 | ['system', "Answer the user's questions based on the below context:\n\n{context}"],
82 | ['human', '{input}'],
83 | ]);
84 |
85 | const combineDocsChain = await createStuffDocumentsChain({
86 | llm: model,
87 | prompt: questionAnsweringPrompt,
88 | });
89 |
90 | const store = new AzureCosmosDBVectorStore(embeddings, {});
91 |
92 | const chain = await createRetrievalChain({
93 | retriever: store.asRetriever(),
94 | combineDocsChain,
95 | });
96 |
97 | const response = await chain.stream({
98 | input: question,
99 | });
100 |
101 | return {
102 | headers: { 'Content-Type': 'text/plain' },
103 | body: createStream(response),
104 | };
105 | } catch (error: unknown) {
106 | const error_ = error as Error;
107 | context.error(`Error when processing chat request: ${error_.message}`);
108 |
109 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
110 | }
111 | }
112 |
113 | function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) {
114 | const buffer = new Readable({
115 | read() {},
116 | });
117 |
118 | const stream = async () => {
119 | for await (const chunk of chunks) {
120 | buffer.push(chunk.answer);
121 | }
122 |
123 | buffer.push(null);
124 | };
125 |
126 | stream();
127 |
128 | return buffer;
129 | }
130 |
131 | app.setup({ enableHttpStream: true });
132 | app.post('chat', {
133 | route: 'chat',
134 | authLevel: 'anonymous',
135 | handler: chat,
136 | });
137 | ```
138 |
139 | And that's it! Azure Functions is now enabled to support streaming.
140 |
141 | ## Generating a stream response in the `chat` API
142 |
143 | Now, let's move on and create the logic to generate a stream response in the `chat` API.
144 |
145 | Open the `chat.ts` file and let's make some significant changes:
146 |
147 | - `chat.ts`
148 |
149 | ```typescript
150 | import { Readable } from 'node:stream';
151 | import { Document } from '@langchain/core/documents';
152 | import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions';
153 | import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/azure-openai';
154 | import { ChatPromptTemplate } from '@langchain/core/prompts';
155 | import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
156 | import { AzureCosmosDBVectorStore } from '@langchain/community/vectorstores/azure_cosmosdb';
157 | import { createRetrievalChain } from 'langchain/chains/retrieval';
158 | import 'dotenv/config';
159 | import { badRequest, serviceUnavailable } from '../utils';
160 |
161 | export async function chat(request: HttpRequest, context: InvocationContext): Promise {
162 | try {
163 | const requestBody: any = await request.json();
164 |
165 | if (!requestBody?.question) {
166 | return badRequest(new Error('No question provided'));
167 | }
168 |
169 | const { question } = requestBody;
170 |
171 | const embeddings = new AzureOpenAIEmbeddings();
172 |
173 | const prompt = `Question: ${question}`;
174 | context.log(`Sending prompt to the model: ${prompt}`);
175 |
176 | const model = new AzureChatOpenAI();
177 |
178 | const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([
179 | ['system', "Answer the user's questions based on the below context:\n\n{context}"],
180 | ['human', '{input}'],
181 | ]);
182 |
183 | const combineDocsChain = await createStuffDocumentsChain({
184 | llm: model,
185 | prompt: questionAnsweringPrompt,
186 | });
187 |
188 | const store = new AzureCosmosDBVectorStore(embeddings, {});
189 |
190 | const chain = await createRetrievalChain({
191 | retriever: store.asRetriever(),
192 | combineDocsChain,
193 | });
194 |
195 | const response = await chain.stream({
196 | input: question,
197 | });
198 |
199 | return {
200 | body: createStream(response),
201 | headers: {
202 | 'Content-Type': 'text/plain',
203 | },
204 | };
205 | } catch (error: unknown) {
206 | const error_ = error as Error;
207 | context.error(`Error when processing chat request: ${error_.message}`);
208 |
209 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.'));
210 | }
211 | }
212 |
213 | function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) {
214 | const buffer = new Readable({
215 | read() {},
216 | });
217 |
218 | const stream = async () => {
219 | for await (const chunk of chunks) {
220 | buffer.push(chunk.answer);
221 | }
222 |
223 | buffer.push(null);
224 | };
225 |
226 | stream();
227 |
228 | return buffer;
229 | }
230 |
231 | app.setup({ enableHttpStream: true });
232 | app.post('chat', {
233 | route: 'chat',
234 | authLevel: 'anonymous',
235 | handler: chat,
236 | });
237 | ```
238 |
239 | Several changes here, right? But let's understand what has been changed and included here:
240 |
241 | ```typescript
242 | const response = await chain.stream({
243 | input: question,
244 | });
245 | ```
246 |
247 | Before, the `chain` variable was using the `invoke()` method. However, as we now want to generate a stream response, we are using the `stream()` method. And passing the `input` parameter with the question the user asked.
248 |
249 | After that, we're returning the stream response, using the `createStream()` function.
250 |
251 | ```typescript
252 | function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) {
253 | const buffer = new Readable({
254 | read() {},
255 | });
256 |
257 | const stream = async () => {
258 | for await (const chunk of chunks) {
259 | buffer.push(chunk.answer);
260 | }
261 |
262 | buffer.push(null);
263 | };
264 |
265 | stream();
266 |
267 | return buffer;
268 | }
269 |
270 | app.setup({ enableHttpStream: true });
271 | app.post('chat', {
272 | route: 'chat',
273 | authLevel: 'anonymous',
274 | handler: chat,
275 | });
276 | ```
277 |
278 | The `createStream()` function is responsible for generating the stream response. It receives an `AsyncIterable` of `{ context: Document[]; answer: string }` as a parameter. And then creates a `Readable` stream, which is an interface for reading data from a stream.
279 |
280 | Note that we are importing:
281 |
282 | - `Document` from the `@langchain/core/documents` package: which is an interface for interacting with a document.
283 | - `Readable` from the `node:stream` package: class that belongs to the `stream` module of Node.js, which is an interface for reading data from a stream.
284 |
285 | ```typescript
286 | return {
287 | headers: { 'Content-Type': 'text/plain' },
288 | body: createStream(response),
289 | };
290 | ```
291 |
292 | And finally, we're returning the stream response using the `createStream()` function. And setting the `Content-Type` header to `text/plain`.
293 |
294 | And that's it! Now the `chat` API is ready to generate stream responses.
295 |
296 | Let's test the `chat` API and see how it behaves when generating a stream response. To do this, open the terminal again in the `api` folder and run the command:
297 |
298 | ```bash
299 | npm run start
300 | ```
301 |
302 | And then open the `api.http` file and send the `chat` API request, and now see the return of the response using the stream in the gif below:
303 |
304 | 
305 |
306 | Note that when we send the request, the `Response` header shows `Transfer-Encoding: chunked`, which indicates that the response is being sent in chunks. And the response is displayed sequentially, i.e. as the response is generated, it is displayed.
307 |
308 | 
309 |
310 | And that's it! You've now learned how to generate a stream response in the `chat` API using LangChain.js and the new stream feature that is also available for v4 of the Azure Functions programming model.
311 |
--------------------------------------------------------------------------------
/infra/main.bicep:
--------------------------------------------------------------------------------
1 | targetScope = 'subscription'
2 |
3 | @minLength(1)
4 | @maxLength(64)
5 | @description('Name of the the environment which is used to generate a short unique hash used in all resources.')
6 | param environmentName string
7 |
8 | @minLength(1)
9 | @description('Primary location for all resources')
10 | param location string
11 |
12 | param resourceGroupName string = ''
13 | param webappName string = 'webapp'
14 | param apiServiceName string = 'api'
15 | param appServicePlanName string = ''
16 | param storageAccountName string = ''
17 | param cosmosDbServiceName string = ''
18 |
19 | @description('Location for the OpenAI resource group')
20 | @allowed(['australiaeast', 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'japaneast', 'northcentralus', 'swedencentral', 'switzerlandnorth', 'uksouth', 'westeurope'])
21 | @metadata({
22 | azd: {
23 | type: 'location'
24 | }
25 | })
26 | param openAiLocation string // Set in main.parameters.json
27 | param openAiSkuName string = 'S0'
28 | param openAiUrl string = ''
29 | param openAiApiVersion string // Set in main.parameters.json
30 |
31 | // Location is not relevant here as it's only for the built-in api
32 | // which is not used here. Static Web App is a global service otherwise
33 | @description('Location for the Static Web App')
34 | @allowed(['westus2', 'centralus', 'eastus2', 'westeurope', 'eastasia', 'eastasiastage'])
35 | @metadata({
36 | azd: {
37 | type: 'location'
38 | }
39 | })
40 | param webappLocation string // Set in main.parameters.json
41 |
42 | param chatModelName string // Set in main.parameters.json
43 | param chatDeploymentName string = chatModelName
44 | param chatModelVersion string // Set in main.parameters.json
45 | param chatDeploymentCapacity int = 15
46 | param embeddingsModelName string // Set in main.parameters.json
47 | param embeddingsModelVersion string // Set in main.parameters.json
48 | param embeddingsDeploymentName string = embeddingsModelName
49 | param embeddingsDeploymentCapacity int = 30
50 |
51 | param blobContainerName string = 'files'
52 |
53 | // Id of the user or app to assign application roles
54 | param principalId string = ''
55 |
56 | // Enable enhanced security with VNet integration
57 | param useVnet bool // Set in main.parameters.json
58 |
59 | // Differentiates between automated and manual deployments
60 | param isContinuousDeployment bool // Set in main.parameters.json
61 |
62 | var abbrs = loadJsonContent('abbreviations.json')
63 | var resourceToken = toLower(uniqueString(subscription().id, environmentName, location))
64 | var tags = { 'azd-env-name': environmentName }
65 | var finalOpenAiUrl = empty(openAiUrl) ? 'https://${openAi.outputs.name}.openai.azure.com' : openAiUrl
66 | var storageUrl = 'https://${storage.outputs.name}.blob.${environment().suffixes.storage}'
67 | var apiResourceName = '${abbrs.webSitesFunctions}api-${resourceToken}'
68 |
69 | // Organize resources in a resource group
70 | resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
71 | name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}'
72 | location: location
73 | tags: tags
74 | }
75 |
76 | // The application webapp
77 | module webapp './core/host/staticwebapp.bicep' = {
78 | name: 'webapp'
79 | scope: resourceGroup
80 | params: {
81 | name: !empty(webappName) ? webappName : '${abbrs.webStaticSites}web-${resourceToken}'
82 | location: webappLocation
83 | tags: union(tags, { 'azd-service-name': webappName })
84 | sku: useVnet ? {
85 | name: 'Standard'
86 | tier: 'Standard'
87 | } : {
88 | name: 'Free'
89 | tier: 'Free'
90 | }
91 | }
92 | }
93 |
94 | // The application backend API
95 | module api './app/api.bicep' = {
96 | name: 'api'
97 | scope: resourceGroup
98 | params: {
99 | name: apiResourceName
100 | location: location
101 | tags: union(tags, { 'azd-service-name': apiServiceName })
102 | appServicePlanId: appServicePlan.outputs.id
103 | allowedOrigins: [webapp.outputs.uri]
104 | storageAccountName: storage.outputs.name
105 | applicationInsightsName: monitoring.outputs.applicationInsightsName
106 | virtualNetworkSubnetId: useVnet ? vnet.outputs.appSubnetID : ''
107 | staticWebAppName: webapp.outputs.name
108 | appSettings: {
109 | APPINSIGHTS_INSTRUMENTATIONKEY: monitoring.outputs.applicationInsightsInstrumentationKey
110 | AZURE_OPENAI_API_INSTANCE_NAME: openAi.outputs.name
111 | AZURE_OPENAI_API_ENDPOINT: finalOpenAiUrl
112 | AZURE_OPENAI_API_VERSION: openAiApiVersion
113 | AZURE_OPENAI_API_DEPLOYMENT_NAME: chatDeploymentName
114 | AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME: embeddingsDeploymentName
115 | AZURE_COSMOSDB_NOSQL_ENDPOINT: cosmosDb.outputs.endpoint
116 | AZURE_STORAGE_URL: storageUrl
117 | AZURE_STORAGE_CONTAINER_NAME: blobContainerName
118 | }
119 | }
120 | dependsOn: empty(openAiUrl) ? [] : [openAi]
121 | }
122 |
123 | // Compute plan for the Azure Functions API
124 | module appServicePlan './core/host/appserviceplan.bicep' = {
125 | name: 'appserviceplan'
126 | scope: resourceGroup
127 | params: {
128 | name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}'
129 | location: location
130 | tags: tags
131 | sku: useVnet ? {
132 | name: 'FC1'
133 | tier: 'FlexConsumption'
134 | } : {
135 | name: 'Y1'
136 | tier: 'Dynamic'
137 | }
138 | reserved: useVnet ? true : null
139 | }
140 | }
141 |
142 | // Storage for Azure Functions API and Blob storage
143 | module storage './core/storage/storage-account.bicep' = {
144 | name: 'storage'
145 | scope: resourceGroup
146 | params: {
147 | name: !empty(storageAccountName) ? storageAccountName : '${abbrs.storageStorageAccounts}${resourceToken}'
148 | location: location
149 | tags: tags
150 | allowBlobPublicAccess: false
151 | allowSharedKeyAccess: !useVnet
152 | containers: concat([
153 | {
154 | name: blobContainerName
155 | publicAccess: 'None'
156 | }
157 | ], useVnet ? [
158 | // Deployment storage container
159 | {
160 | name: apiResourceName
161 | }
162 | ] : [])
163 | networkAcls: useVnet ? {
164 | defaultAction: 'Deny'
165 | bypass: 'AzureServices'
166 | virtualNetworkRules: [
167 | {
168 | id: vnet.outputs.appSubnetID
169 | action: 'Allow'
170 | }
171 | ]
172 | } : {
173 | bypass: 'AzureServices'
174 | defaultAction: 'Allow'
175 | }
176 | }
177 | }
178 |
179 | // Virtual network for Azure Functions API
180 | module vnet './app/vnet.bicep' = if (useVnet) {
181 | name: 'vnet'
182 | scope: resourceGroup
183 | params: {
184 | name: '${abbrs.networkVirtualNetworks}${resourceToken}'
185 | location: location
186 | tags: tags
187 | }
188 | }
189 |
190 | // Monitor application with Azure Monitor
191 | module monitoring './core/monitor/monitoring.bicep' = {
192 | name: 'monitoring'
193 | scope: resourceGroup
194 | params: {
195 | location: location
196 | tags: tags
197 | logAnalyticsName: '${abbrs.operationalInsightsWorkspaces}${resourceToken}'
198 | applicationInsightsName: '${abbrs.insightsComponents}${resourceToken}'
199 | applicationInsightsDashboardName: '${abbrs.portalDashboards}${resourceToken}'
200 | }
201 | }
202 |
203 | module openAi 'core/ai/cognitiveservices.bicep' = if (empty(openAiUrl)) {
204 | name: 'openai'
205 | scope: resourceGroup
206 | params: {
207 | name: '${abbrs.cognitiveServicesAccounts}${resourceToken}'
208 | location: openAiLocation
209 | tags: tags
210 | sku: {
211 | name: openAiSkuName
212 | }
213 | disableLocalAuth: true
214 | deployments: [
215 | {
216 | name: chatDeploymentName
217 | model: {
218 | format: 'OpenAI'
219 | name: chatModelName
220 | version: chatModelVersion
221 | }
222 | sku: {
223 | name: 'GlobalStandard'
224 | capacity: chatDeploymentCapacity
225 | }
226 | }
227 | {
228 | name: embeddingsDeploymentName
229 | model: {
230 | format: 'OpenAI'
231 | name: embeddingsModelName
232 | version: embeddingsModelVersion
233 | }
234 | capacity: embeddingsDeploymentCapacity
235 | }
236 | ]
237 | }
238 | }
239 |
240 | module cosmosDb 'br/public:avm/res/document-db/database-account:0.9.0' = {
241 | name: 'cosmosDb'
242 | scope: resourceGroup
243 | params: {
244 | name: !empty(cosmosDbServiceName) ? cosmosDbServiceName : '${abbrs.documentDBDatabaseAccounts}${resourceToken}'
245 | tags: tags
246 | locations: [
247 | {
248 | locationName: location
249 | failoverPriority: 0
250 | isZoneRedundant: false
251 | }
252 | ]
253 | managedIdentities: {
254 | systemAssigned: true
255 | }
256 | capabilitiesToAdd: [
257 | 'EnableServerless'
258 | 'EnableNoSQLVectorSearch'
259 | ]
260 | networkRestrictions: {
261 | ipRules: []
262 | virtualNetworkRules: []
263 | publicNetworkAccess: 'Enabled'
264 | }
265 | sqlDatabases: [
266 | {
267 | containers: [
268 | {
269 | name: 'vectorSearchContainer'
270 | paths: [
271 | '/id'
272 | ]
273 | }
274 | ]
275 | name: 'vectorSearchDB'
276 | }
277 | {
278 | containers: [
279 | {
280 | name: 'chatHistoryContainer'
281 | paths: [
282 | '/userId'
283 | ]
284 | }
285 | ]
286 | name: 'chatHistoryDB'
287 | }
288 | ]
289 | }
290 | }
291 |
292 | module dbRoleDefinition './core/database/cosmos/sql/cosmos-sql-role-def.bicep' = {
293 | scope: resourceGroup
294 | name: 'db-contrib-role-definition'
295 | params: {
296 | accountName: cosmosDb.outputs.name
297 | }
298 | }
299 |
300 |
301 | // Managed identity roles assignation
302 | // ---------------------------------------------------------------------------
303 |
304 | // User roles
305 | module openAiRoleUser 'core/security/role.bicep' = if (!isContinuousDeployment) {
306 | scope: resourceGroup
307 | name: 'openai-role-user'
308 | params: {
309 | principalId: principalId
310 | // Cognitive Services OpenAI User
311 | roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd'
312 | principalType: 'User'
313 | }
314 | }
315 |
316 | module storageRoleUser 'core/security/role.bicep' = if (!isContinuousDeployment) {
317 | scope: resourceGroup
318 | name: 'storage-contrib-role-user'
319 | params: {
320 | principalId: principalId
321 | // Storage Blob Data Contributor
322 | roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe'
323 | principalType: 'User'
324 | }
325 | }
326 |
327 | module dbContribRoleUser './core/database/cosmos/sql/cosmos-sql-role-assign.bicep' = if (!isContinuousDeployment) {
328 | scope: resourceGroup
329 | name: 'db-contrib-role-user'
330 | params: {
331 | accountName: cosmosDb.outputs.name
332 | principalId: principalId
333 | // Cosmos DB Data Contributor
334 | roleDefinitionId: dbRoleDefinition.outputs.id
335 | }
336 | }
337 |
338 | // System roles
339 | module openAiRoleApi 'core/security/role.bicep' = {
340 | scope: resourceGroup
341 | name: 'openai-role-api'
342 | params: {
343 | principalId: api.outputs.identityPrincipalId
344 | // Cognitive Services OpenAI User
345 | roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd'
346 | principalType: 'ServicePrincipal'
347 | }
348 | }
349 |
350 | module storageRoleApi 'core/security/role.bicep' = {
351 | scope: resourceGroup
352 | name: 'storage-role-api'
353 | params: {
354 | principalId: api.outputs.identityPrincipalId
355 | // Storage Blob Data Contributor
356 | roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe'
357 | principalType: 'ServicePrincipal'
358 | }
359 | }
360 |
361 | module dbContribRoleApi './core/database/cosmos/sql/cosmos-sql-role-assign.bicep' = {
362 | scope: resourceGroup
363 | name: 'db-contrib-role-api'
364 | params: {
365 | accountName: cosmosDb.outputs.name
366 | principalId: api.outputs.identityPrincipalId
367 | // Cosmos DB Data Contributor
368 | roleDefinitionId: dbRoleDefinition.outputs.id
369 | }
370 | }
371 |
372 | output AZURE_LOCATION string = location
373 | output AZURE_TENANT_ID string = tenant().tenantId
374 | output AZURE_RESOURCE_GROUP string = resourceGroup.name
375 |
376 | output AZURE_OPENAI_API_ENDPOINT string = finalOpenAiUrl
377 | output AZURE_OPENAI_API_INSTANCE_NAME string = openAi.outputs.name
378 | output AZURE_OPENAI_API_VERSION string = openAiApiVersion
379 | output AZURE_OPENAI_API_DEPLOYMENT_NAME string = chatDeploymentName
380 | output AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME string = embeddingsDeploymentName
381 | output AZURE_STORAGE_URL string = storageUrl
382 | output AZURE_STORAGE_CONTAINER_NAME string = blobContainerName
383 | output AZURE_COSMOSDB_NOSQL_ENDPOINT string = cosmosDb.outputs.endpoint
384 |
385 | output API_URL string = useVnet ? '' : api.outputs.uri
386 | output WEBAPP_URL string = webapp.outputs.uri
387 | output UPLOAD_URL string = useVnet ? webapp.outputs.uri : api.outputs.uri
388 |
--------------------------------------------------------------------------------