├── docs ├── old-tutorial │ ├── 06-session.md │ ├── images │ │ ├── chat-api.png │ │ ├── stream-response.gif │ │ ├── upload-function.png │ │ ├── chat-final-result.gif │ │ ├── post-request-chat.png │ │ ├── chat-stream-response.png │ │ ├── from-messages-method.png │ │ └── test-upload-function.gif │ ├── 02-session.md │ ├── 04-session.md │ ├── 01-session.md │ └── 05-session.md ├── tutorial │ ├── 05-config-files-app.md │ ├── images │ │ ├── rag.png │ │ ├── deployed-app.png │ │ ├── ollama-page.png │ │ ├── mistral-7b-page.png │ │ ├── application-webapp.png │ │ ├── azure-openai-page.png │ │ ├── ollama-mistra-cli.png │ │ ├── function-chat-error.png │ │ ├── init-functions-project.png │ │ ├── services-azure-portal.png │ │ └── azure-functions-project-structure.png │ ├── 03-understanding-rag.md │ ├── 01-introduction.md │ └── 04-preparing-understanding-language-models.md ├── images │ ├── demo.gif │ ├── rag.png │ ├── azd-up.png │ ├── clone-url.png │ ├── codespaces.png │ ├── architecture.drawio.png │ └── architecture-local.drawio.png ├── cost.md ├── enhance-security.md ├── troubleshooting.md ├── faq.md └── readme.md ├── packages ├── webapp │ ├── src │ │ ├── vite-env.d.ts │ │ ├── index.ts │ │ ├── api.ts │ │ └── message-parser.ts │ ├── public │ │ ├── favicon.png │ │ └── staticwebapp.config.json │ ├── assets │ │ ├── panel.svg │ │ ├── send.svg │ │ ├── delete.svg │ │ ├── new-chat.svg │ │ └── question.svg │ ├── README.md │ ├── package.json │ ├── vite.config.ts │ ├── tsconfig.json │ └── index.html └── api │ ├── local.settings.json │ ├── .funcignore │ ├── src │ ├── constants.ts │ ├── http-response.ts │ ├── security.ts │ └── functions │ │ ├── chats-delete.ts │ │ ├── chats-get.ts │ │ ├── documents-get.ts │ │ ├── documents-post.ts │ │ └── chats-post.ts │ ├── host.json │ ├── tsconfig.json │ ├── .env.sample │ ├── README.md │ ├── api.http │ ├── package.json │ └── .gitignore ├── .vscode ├── extensions.json ├── launch.json ├── settings.json └── tasks.json ├── data ├── support.pdf ├── privacy-policy.pdf ├── terms-of-service.pdf └── README.md ├── .gitignore ├── .editorconfig ├── .github ├── CODE_OF_CONDUCT.md ├── workflows │ ├── stale-bot.yaml │ ├── validate-infra.yaml │ ├── build-test.yaml │ └── azure-dev.yaml ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md ├── SECURITY.md └── CONTRIBUTING.md ├── infra ├── app │ ├── linked-backend.bicep │ ├── api.bicep │ └── vnet.bicep ├── core │ ├── host │ │ ├── staticwebapp.bicep │ │ ├── appserviceplan.bicep │ │ ├── appservice-appsettings.bicep │ │ ├── functions.bicep │ │ ├── appservice.bicep │ │ └── functions-flex.bicep │ ├── monitor │ │ ├── loganalytics.bicep │ │ ├── applicationinsights.bicep │ │ └── monitoring.bicep │ ├── database │ │ └── cosmos │ │ │ └── sql │ │ │ ├── cosmos-sql-role-assign.bicep │ │ │ └── cosmos-sql-role-def.bicep │ ├── security │ │ └── role.bicep │ ├── ai │ │ └── cognitiveservices.bicep │ └── storage │ │ └── storage-account.bicep ├── main.parameters.json ├── abbreviations.json └── main.bicep ├── SUPPORT.md ├── azure.yaml ├── LICENSE ├── .devcontainer └── devcontainer.json ├── scripts └── upload-documents.js ├── AGENTS.md └── package.json /docs/old-tutorial/06-session.md: -------------------------------------------------------------------------------- 1 | # `get-documents.ts` 2 | 3 | to-do 4 | -------------------------------------------------------------------------------- /packages/webapp/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["ms-azuretools.vscode-azurefunctions"] 3 | } 4 | -------------------------------------------------------------------------------- /data/support.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/data/support.pdf -------------------------------------------------------------------------------- /docs/tutorial/05-config-files-app.md: -------------------------------------------------------------------------------- 1 | # Preparing the Configuration Files for the `chat` API 2 | 3 | **todo** 4 | -------------------------------------------------------------------------------- /docs/images/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/demo.gif -------------------------------------------------------------------------------- /docs/images/rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/rag.png -------------------------------------------------------------------------------- /data/privacy-policy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/data/privacy-policy.pdf -------------------------------------------------------------------------------- /docs/images/azd-up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/azd-up.png -------------------------------------------------------------------------------- /data/terms-of-service.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/data/terms-of-service.pdf -------------------------------------------------------------------------------- /docs/images/clone-url.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/clone-url.png -------------------------------------------------------------------------------- /docs/images/codespaces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/codespaces.png -------------------------------------------------------------------------------- /docs/tutorial/images/rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/rag.png -------------------------------------------------------------------------------- /docs/images/architecture.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/architecture.drawio.png -------------------------------------------------------------------------------- /packages/webapp/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/packages/webapp/public/favicon.png -------------------------------------------------------------------------------- /docs/old-tutorial/images/chat-api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/chat-api.png -------------------------------------------------------------------------------- /docs/tutorial/images/deployed-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/deployed-app.png -------------------------------------------------------------------------------- /docs/tutorial/images/ollama-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/ollama-page.png -------------------------------------------------------------------------------- /docs/tutorial/images/mistral-7b-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/mistral-7b-page.png -------------------------------------------------------------------------------- /docs/images/architecture-local.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/images/architecture-local.drawio.png -------------------------------------------------------------------------------- /docs/tutorial/images/application-webapp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/application-webapp.png -------------------------------------------------------------------------------- /docs/tutorial/images/azure-openai-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/azure-openai-page.png -------------------------------------------------------------------------------- /docs/tutorial/images/ollama-mistra-cli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/ollama-mistra-cli.png -------------------------------------------------------------------------------- /docs/old-tutorial/images/stream-response.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/stream-response.gif -------------------------------------------------------------------------------- /docs/old-tutorial/images/upload-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/upload-function.png -------------------------------------------------------------------------------- /docs/tutorial/images/function-chat-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/function-chat-error.png -------------------------------------------------------------------------------- /docs/old-tutorial/images/chat-final-result.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/chat-final-result.gif -------------------------------------------------------------------------------- /docs/old-tutorial/images/post-request-chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/post-request-chat.png -------------------------------------------------------------------------------- /docs/tutorial/images/init-functions-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/init-functions-project.png -------------------------------------------------------------------------------- /docs/tutorial/images/services-azure-portal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/services-azure-portal.png -------------------------------------------------------------------------------- /docs/old-tutorial/images/chat-stream-response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/chat-stream-response.png -------------------------------------------------------------------------------- /docs/old-tutorial/images/from-messages-method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/from-messages-method.png -------------------------------------------------------------------------------- /docs/old-tutorial/images/test-upload-function.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/old-tutorial/images/test-upload-function.gif -------------------------------------------------------------------------------- /packages/webapp/src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './api.js'; 2 | export * from './components/chat.js'; 3 | export * from './components/history.js'; 4 | export * from './message-parser.js'; 5 | -------------------------------------------------------------------------------- /docs/tutorial/images/azure-functions-project-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/serverless-chat-langchainjs/HEAD/docs/tutorial/images/azure-functions-project-structure.png -------------------------------------------------------------------------------- /packages/api/local.settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "IsEncrypted": false, 3 | "Values": { 4 | "FUNCTIONS_WORKER_RUNTIME": "node", 5 | "AzureWebJobsFeatureFlags": "EnableWorkerIndexing" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /packages/webapp/public/staticwebapp.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingSlash": "auto", 3 | "navigationFallback": { 4 | "rewrite": "index.html", 5 | "exclude": ["/assets/*", "*.css"] 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /packages/api/.funcignore: -------------------------------------------------------------------------------- 1 | *.js.map 2 | .git* 3 | .vscode 4 | __azurite_db*__.json 5 | __blobstorage__ 6 | __queuestorage__ 7 | node_modules/ 8 | local.settings.json 9 | test 10 | .faiss 11 | api.http 12 | .env.sample 13 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Attach to Node Functions", 6 | "type": "node", 7 | "request": "attach", 8 | "port": 9229, 9 | "preLaunchTask": "func: host start" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled output 2 | node_modules/ 3 | dist/ 4 | .tmp/ 5 | 6 | # Logs 7 | logs 8 | *.log 9 | npm-debug.log* 10 | pnpm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | 14 | # Deployment 15 | *.env 16 | .azure 17 | 18 | # Misc 19 | .DS_Store 20 | Thumbs.db 21 | TODO 22 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # Editor configuration, see http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | indent_style = space 8 | indent_size = 2 9 | insert_final_newline = true 10 | trim_trailing_whitespace = true 11 | 12 | [*.md] 13 | max_line_length = off 14 | trim_trailing_whitespace = false 15 | -------------------------------------------------------------------------------- /packages/api/src/constants.ts: -------------------------------------------------------------------------------- 1 | // Ollama models configuration 2 | // You can see the complete list of available models at https://ollama.ai/models 3 | export const ollamaEmbeddingsModel = 'nomic-embed-text:latest'; 4 | export const ollamaChatModel = 'llama3.1:latest'; 5 | 6 | // Faiss local store folder 7 | export const faissStoreFolder = '.faiss'; 8 | -------------------------------------------------------------------------------- /packages/api/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 13 | "version": "[4.*, 5.0.0)" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /packages/webapp/assets/panel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureFunctions.deploySubpath": "packages/api", 3 | "azureFunctions.postDeployTask": "npm install (functions)", 4 | "azureFunctions.projectLanguage": "TypeScript", 5 | "azureFunctions.projectRuntime": "~4", 6 | "debug.internalConsoleOptions": "neverOpen", 7 | "azureFunctions.projectLanguageModel": 3, 8 | "azureFunctions.projectSubpath": "packages/api", 9 | "azureFunctions.preDeployTask": "npm prune (functions)" 10 | } 11 | -------------------------------------------------------------------------------- /packages/api/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "CommonJS", 4 | "target": "ESNext", 5 | "incremental": true, 6 | "composite": true, 7 | "skipLibCheck": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "outDir": "dist", 10 | "rootDir": ".", 11 | "sourceMap": true, 12 | "strict": true, 13 | "moduleResolution": "node", 14 | "esModuleInterop": true, 15 | "lib": ["ESNext"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /packages/webapp/README.md: -------------------------------------------------------------------------------- 1 | # Chat webapp 2 | 3 | This project uses [Vite](https://vitejs.dev/) as a frontend build tool, and [Lit](https://lit.dev/) as a web components library. 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm run dev` 10 | 11 | To start the app in dev mode. 12 | Open [http://localhost:8000](http://localhost:8000) to view it in the browser. 13 | 14 | ### `npm run build` 15 | 16 | To build the app for production to the `dist` folder. 17 | -------------------------------------------------------------------------------- /infra/app/linked-backend.bicep: -------------------------------------------------------------------------------- 1 | param staticWebAppName string 2 | param backendResourceId string 3 | param backendLocation string 4 | 5 | resource staticWebApp 'Microsoft.Web/staticSites@2023-12-01' existing = { 6 | name: staticWebAppName 7 | } 8 | 9 | resource linkedStaticWebAppBackend 'Microsoft.Web/staticSites/linkedBackends@2023-12-01' = { 10 | parent: staticWebApp 11 | name: 'linkedBackend' 12 | properties: { 13 | backendResourceId: backendResourceId 14 | region: backendLocation 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Data disclaimer 2 | 3 | The documents used in this sample contain information generated using a language model (Azure OpenAI Service). The information contained in these documents is only for demonstration purposes and does not reflect the opinions or beliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, about the completeness, accuracy, reliability, suitability or availability with respect to the information contained in this document. All rights reserved to Microsoft. 4 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please use GitHub Issues and tag them with the 10 | **question** label. 11 | 12 | ## Microsoft Support Policy 13 | 14 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 15 | -------------------------------------------------------------------------------- /packages/api/.env.sample: -------------------------------------------------------------------------------- 1 | # Azure OpenAI configuration 2 | AZURE_OPENAI_API_ENDPOINT="" 3 | AZURE_OPENAI_API_KEY="" 4 | AZURE_OPENAI_API_DEPLOYMENT_NAME="" 5 | AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME="" 6 | 7 | # Azure CosmosDB for MongoDB vCore configuration 8 | AZURE_COSMOSDB_CONNECTION_STRING="" 9 | 10 | # Azure Blob Storage configuration 11 | AZURE_STORAGE_CONNECTION_STRING="" 12 | AZURE_STORAGE_CONTAINER_NAME="" 13 | -------------------------------------------------------------------------------- /infra/core/host/staticwebapp.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure Static Web Apps instance.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | param sku object = { 7 | name: 'Free' 8 | tier: 'Free' 9 | } 10 | 11 | resource web 'Microsoft.Web/staticSites@2023-12-01' = { 12 | name: name 13 | location: location 14 | tags: tags 15 | sku: sku 16 | properties: { 17 | provider: 'Custom' 18 | } 19 | } 20 | 21 | output name string = web.name 22 | output uri string = 'https://${web.properties.defaultHostname}' 23 | -------------------------------------------------------------------------------- /infra/core/host/appserviceplan.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure App Service plan.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | param kind string = '' 7 | param reserved bool = true 8 | param sku object 9 | 10 | resource appServicePlan 'Microsoft.Web/serverfarms@2023-12-01' = { 11 | name: name 12 | location: location 13 | tags: tags 14 | sku: sku 15 | kind: kind 16 | properties: { 17 | reserved: reserved 18 | } 19 | } 20 | 21 | output id string = appServicePlan.id 22 | output name string = appServicePlan.name 23 | -------------------------------------------------------------------------------- /infra/core/host/appservice-appsettings.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Updates app settings for an Azure App Service.' 2 | @description('The name of the app service resource within the current resource group scope') 3 | param name string 4 | 5 | @description('The app settings to be applied to the app service') 6 | @secure() 7 | param appSettings object 8 | 9 | resource appService 'Microsoft.Web/sites@2022-03-01' existing = { 10 | name: name 11 | } 12 | 13 | resource settings 'Microsoft.Web/sites/config@2022-03-01' = { 14 | name: 'appsettings' 15 | parent: appService 16 | properties: appSettings 17 | } 18 | -------------------------------------------------------------------------------- /infra/core/monitor/loganalytics.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a Log Analytics workspace.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = { 7 | name: name 8 | location: location 9 | tags: tags 10 | properties: any({ 11 | retentionInDays: 30 12 | features: { 13 | searchVersion: 1 14 | } 15 | sku: { 16 | name: 'PerGB2018' 17 | } 18 | }) 19 | } 20 | 21 | output id string = logAnalytics.id 22 | output name string = logAnalytics.name 23 | -------------------------------------------------------------------------------- /packages/webapp/assets/send.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/webapp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webapp", 3 | "version": "1.0.0", 4 | "description": "Web app for the serverless AI Chat RAG sample", 5 | "private": true, 6 | "type": "module", 7 | "scripts": { 8 | "dev": "vite --port 8000 --host", 9 | "build": "vite build", 10 | "watch": "vite build --watch --minify false", 11 | "clean": "npx rimraf dist" 12 | }, 13 | "author": "Microsoft", 14 | "license": "MIT", 15 | "dependencies": { 16 | "@microsoft/ai-chat-protocol": "^1.0.0-beta.20240814.1", 17 | "lit": "^3.0.0" 18 | }, 19 | "devDependencies": { 20 | "vite": "^6.0.2" 21 | }, 22 | "files": [ 23 | "dist" 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /infra/core/database/cosmos/sql/cosmos-sql-role-assign.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a SQL role assignment under an Azure Cosmos DB account.' 2 | param accountName string 3 | 4 | param roleDefinitionId string 5 | param principalId string = '' 6 | 7 | resource role 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2022-05-15' = { 8 | parent: cosmos 9 | name: guid(roleDefinitionId, principalId, cosmos.id) 10 | properties: { 11 | principalId: principalId 12 | roleDefinitionId: roleDefinitionId 13 | scope: cosmos.id 14 | } 15 | } 16 | 17 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = { 18 | name: accountName 19 | } 20 | -------------------------------------------------------------------------------- /infra/core/security/role.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a role assignment for a service principal.' 2 | param principalId string 3 | 4 | @allowed([ 5 | 'Device' 6 | 'ForeignGroup' 7 | 'Group' 8 | 'ServicePrincipal' 9 | 'User' 10 | ]) 11 | param principalType string = 'ServicePrincipal' 12 | param roleDefinitionId string 13 | 14 | resource role 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 15 | name: guid(subscription().id, resourceGroup().id, principalId, roleDefinitionId) 16 | properties: { 17 | principalId: principalId 18 | principalType: principalType 19 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionId) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /packages/webapp/vite.config.ts: -------------------------------------------------------------------------------- 1 | import process from 'node:process'; 2 | import { defineConfig } from 'vite'; 3 | 4 | // Expose environment variables to the client 5 | process.env.VITE_API_URL = process.env.API_URL ?? ''; 6 | console.log(`Using chat API base URL: "${process.env.VITE_API_URL}"`); 7 | 8 | export default defineConfig({ 9 | build: { 10 | outDir: './dist', 11 | emptyOutDir: true, 12 | sourcemap: true, 13 | rollupOptions: { 14 | output: { 15 | manualChunks(id) { 16 | if (id.includes('node_modules')) { 17 | return 'vendor'; 18 | } 19 | }, 20 | }, 21 | }, 22 | }, 23 | server: { 24 | proxy: { 25 | '/api': 'http://127.0.0.1:7071', 26 | }, 27 | }, 28 | }); 29 | -------------------------------------------------------------------------------- /packages/api/README.md: -------------------------------------------------------------------------------- 1 | # Azure Functions API 2 | 3 | This project uses [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript) as a serverless API, and [LangChain.js](https://js.langchain.com/) to implement the AI capabilities. 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm start` 10 | 11 | This command will start the API in dev mode, and you will be able to access it through the URL `http://localhost:7071/api/`. 12 | 13 | You can use the `api.http` file to test the API using the [REST Client](https://marketplace.visualstudio.com/items?itemName=humao.rest-client) extension for Visual Studio Code. 14 | 15 | ### `npm run build` 16 | 17 | To build the API for production to the `dist` folder. 18 | -------------------------------------------------------------------------------- /packages/webapp/assets/delete.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/workflows/stale-bot.yaml: -------------------------------------------------------------------------------- 1 | name: Close stale issues and PRs 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | contents: write 11 | issues: write 12 | pull-requests: write 13 | steps: 14 | - uses: actions/stale@v9 15 | with: 16 | stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this issue will be closed.' 17 | stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed.' 18 | close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity.' 19 | close-pr-message: 'This PR was closed because it has been stalled for 7 days with no activity.' 20 | days-before-issue-stale: 60 21 | days-before-pr-stale: 60 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | > ## Please provide us with the following information: 6 | 7 | ### This issue is for a: (mark with an `x`) 8 | 9 | ``` 10 | - [ ] bug report -> please search issues before submitting 11 | - [ ] feature request 12 | - [ ] documentation issue or request 13 | - [ ] regression (a behavior that used to work and stopped in a new release) 14 | ``` 15 | 16 | ### Minimal steps to reproduce 17 | 18 | > 19 | 20 | ### Any log messages given by the failure 21 | 22 | > 23 | 24 | ### Expected/desired behavior 25 | 26 | > 27 | 28 | ### OS and Version? 29 | 30 | > Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?) 31 | 32 | ### Versions 33 | 34 | > 35 | 36 | ### Mention any other details that might be useful 37 | 38 | > --- 39 | > 40 | > Thanks! We'll be in touch soon. 41 | -------------------------------------------------------------------------------- /infra/core/database/cosmos/sql/cosmos-sql-role-def.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a SQL role definition under an Azure Cosmos DB account.' 2 | param accountName string 3 | 4 | resource roleDefinition 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions@2022-08-15' = { 5 | parent: cosmos 6 | name: guid(cosmos.id, accountName, 'sql-role') 7 | properties: { 8 | assignableScopes: [ 9 | cosmos.id 10 | ] 11 | permissions: [ 12 | { 13 | dataActions: [ 14 | 'Microsoft.DocumentDB/databaseAccounts/readMetadata' 15 | 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/items/*' 16 | 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/*' 17 | ] 18 | notDataActions: [] 19 | } 20 | ] 21 | roleName: 'Reader Writer' 22 | type: 'CustomRole' 23 | } 24 | } 25 | 26 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = { 27 | name: accountName 28 | } 29 | 30 | output id string = roleDefinition.id 31 | -------------------------------------------------------------------------------- /packages/api/src/http-response.ts: -------------------------------------------------------------------------------- 1 | import { HttpResponseInit } from '@azure/functions'; 2 | 3 | export function badRequest(message: string): HttpResponseInit { 4 | return { 5 | status: 400, 6 | jsonBody: { 7 | error: message, 8 | }, 9 | }; 10 | } 11 | 12 | export function notFound(message: string): HttpResponseInit { 13 | return { 14 | status: 404, 15 | jsonBody: { 16 | error: message, 17 | }, 18 | }; 19 | } 20 | 21 | export function serviceUnavailable(message: string): HttpResponseInit { 22 | return { 23 | status: 503, 24 | jsonBody: { 25 | error: message, 26 | }, 27 | }; 28 | } 29 | 30 | export function ok(body?: object): HttpResponseInit { 31 | return body 32 | ? { 33 | status: 200, 34 | jsonBody: body, 35 | } 36 | : { status: 204 }; 37 | } 38 | 39 | export function data(body: Uint8Array | AsyncIterable, headers: Record): HttpResponseInit { 40 | return { 41 | status: 200, 42 | headers, 43 | body, 44 | }; 45 | } 46 | -------------------------------------------------------------------------------- /packages/webapp/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "module": "esnext", 5 | "lib": ["esnext", "DOM", "DOM.Iterable"], 6 | "strict": true, 7 | "outDir": "./dist", 8 | "rootDir": "./src", 9 | "declaration": true, 10 | "declarationMap": true, 11 | "sourceMap": true, 12 | "inlineSources": true, 13 | "noUnusedLocals": true, 14 | "noUnusedParameters": true, 15 | "noImplicitReturns": true, 16 | "noFallthroughCasesInSwitch": true, 17 | "noImplicitAny": false, 18 | "noImplicitThis": true, 19 | "moduleResolution": "node", 20 | "allowSyntheticDefaultImports": true, 21 | "experimentalDecorators": true, 22 | "forceConsistentCasingInFileNames": true, 23 | "noImplicitOverride": true, 24 | "emitDeclarationOnly": true, 25 | "useDefineForClassFields": false, 26 | "plugins": [ 27 | { 28 | "name": "ts-lit-plugin", 29 | "strict": true 30 | } 31 | ] 32 | }, 33 | "include": ["src/**/*.ts"], 34 | "types": ["vite/client"] 35 | } 36 | -------------------------------------------------------------------------------- /azure.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json 2 | 3 | name: serverless-chat-langchainjs 4 | metadata: 5 | template: serverless-chat-langchainjs@1.0.0 6 | 7 | services: 8 | webapp: 9 | project: ./packages/webapp 10 | dist: dist 11 | language: ts 12 | host: staticwebapp 13 | hooks: 14 | predeploy: 15 | windows: 16 | shell: pwsh 17 | run: npm run build 18 | posix: 19 | shell: sh 20 | run: npm run build 21 | 22 | api: 23 | project: ./packages/api 24 | language: ts 25 | host: function 26 | 27 | hooks: 28 | postprovision: 29 | windows: 30 | shell: pwsh 31 | run: azd env get-values > packages/api/.env 32 | posix: 33 | shell: sh 34 | run: azd env get-values > packages/api/.env 35 | postup: 36 | windows: 37 | shell: pwsh 38 | run: node scripts/upload-documents.js "$env:UPLOAD_URL" 39 | posix: 40 | shell: sh 41 | run: node scripts/upload-documents.js "$UPLOAD_URL" 42 | -------------------------------------------------------------------------------- /.github/workflows/validate-infra.yaml: -------------------------------------------------------------------------------- 1 | name: Validate AZD template 2 | on: 3 | push: 4 | branches: [main] 5 | paths: 6 | - 'infra/**' 7 | pull_request: 8 | branches: [main] 9 | paths: 10 | - 'infra/**' 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | permissions: 16 | security-events: write 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - name: Build Bicep for linting 22 | uses: azure/CLI@v2 23 | with: 24 | inlineScript: az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout 25 | 26 | - name: Run Microsoft Security DevOps Analysis 27 | uses: microsoft/security-devops-action@preview 28 | id: msdo 29 | continue-on-error: true 30 | with: 31 | tools: templateanalyzer 32 | 33 | - name: Upload alerts to Security tab 34 | if: github.repository_owner == 'Azure-Samples' 35 | uses: github/codeql-action/upload-sarif@v3 36 | with: 37 | sarif_file: ${{ steps.msdo.outputs.sarifFile }} 38 | -------------------------------------------------------------------------------- /packages/webapp/src/api.ts: -------------------------------------------------------------------------------- 1 | import { AIChatMessage, AIChatCompletionDelta, AIChatProtocolClient } from '@microsoft/ai-chat-protocol'; 2 | 3 | export const apiBaseUrl: string = import.meta.env.VITE_API_URL || ''; 4 | 5 | export type ChatRequestOptions = { 6 | messages: AIChatMessage[]; 7 | context?: Record; 8 | chunkIntervalMs: number; 9 | apiUrl: string; 10 | }; 11 | 12 | export async function* getCompletion(options: ChatRequestOptions) { 13 | const apiUrl = options.apiUrl || apiBaseUrl; 14 | const client = new AIChatProtocolClient(`${apiUrl}/api/chats`); 15 | const result = await client.getStreamedCompletion(options.messages, { context: options.context }); 16 | 17 | for await (const response of result) { 18 | if (!response.delta) { 19 | continue; 20 | } 21 | 22 | yield new Promise((resolve) => { 23 | setTimeout(() => { 24 | resolve(response); 25 | }, options.chunkIntervalMs); 26 | }); 27 | } 28 | } 29 | 30 | export function getCitationUrl(citation: string): string { 31 | return `${apiBaseUrl}/api/documents/${citation}`; 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /infra/core/monitor/applicationinsights.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Application Insights instance based on an existing Log Analytics workspace.' 2 | param name string 3 | param dashboardName string = '' 4 | param location string = resourceGroup().location 5 | param tags object = {} 6 | param logAnalyticsWorkspaceId string 7 | 8 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { 9 | name: name 10 | location: location 11 | tags: tags 12 | kind: 'web' 13 | properties: { 14 | Application_Type: 'web' 15 | WorkspaceResourceId: logAnalyticsWorkspaceId 16 | } 17 | } 18 | 19 | module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = if (!empty(dashboardName)) { 20 | name: 'application-insights-dashboard' 21 | params: { 22 | name: dashboardName 23 | location: location 24 | applicationInsightsName: applicationInsights.name 25 | } 26 | } 27 | 28 | output connectionString string = applicationInsights.properties.ConnectionString 29 | output id string = applicationInsights.id 30 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey 31 | output name string = applicationInsights.name 32 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Purpose 2 | 3 | 4 | 5 | - ... 6 | 7 | ## Does this introduce a breaking change? 8 | 9 | 10 | 11 | ``` 12 | [ ] Yes 13 | [ ] No 14 | ``` 15 | 16 | ## Pull Request Type 17 | 18 | What kind of change does this Pull Request introduce? 19 | 20 | 21 | 22 | ``` 23 | [ ] Bugfix 24 | [ ] Feature 25 | [ ] Code style update (formatting, local variables) 26 | [ ] Refactoring (no functional changes, no api changes) 27 | [ ] Documentation content changes 28 | [ ] Other... Please describe: 29 | ``` 30 | 31 | ## How to Test 32 | 33 | - Get the code 34 | 35 | ``` 36 | git clone [repo-address] 37 | cd [repo-name] 38 | git checkout [branch-name] 39 | npm install 40 | ``` 41 | 42 | - Test the code 43 | 44 | 45 | ``` 46 | 47 | ``` 48 | 49 | ## What to Check 50 | 51 | Verify that the following are valid 52 | 53 | - ... 54 | 55 | ## Other Information 56 | 57 | 58 | -------------------------------------------------------------------------------- /.github/workflows/build-test.yaml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: [main] 7 | 8 | jobs: 9 | build_test: 10 | strategy: 11 | matrix: 12 | platform: [ubuntu-latest, macos-latest, windows-latest] 13 | node-version: ['20', '22'] 14 | 15 | name: ${{ matrix.platform }} / Node.js v${{ matrix.node-version }} 16 | runs-on: ${{ matrix.platform }} 17 | steps: 18 | - run: git config --global core.autocrlf false # Preserve line endings 19 | - uses: actions/checkout@v4 20 | - name: Setup Node.js v${{ matrix.node-version }} 21 | uses: actions/setup-node@v4 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | - name: Install dependencies 25 | run: npm ci 26 | - name: Build packages 27 | run: npm run build 28 | - name: Lint packages 29 | run: npm run lint 30 | - name: Test packages 31 | run: npm test --if-present 32 | 33 | build_test_all: 34 | if: always() 35 | runs-on: ubuntu-latest 36 | needs: build_test 37 | steps: 38 | - name: Check build matrix status 39 | if: ${{ needs.build_test.result != 'success' }} 40 | run: exit 1 41 | -------------------------------------------------------------------------------- /packages/webapp/assets/new-chat.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/webapp/assets/question.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/api/api.http: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # VS Code with REST Client extension is needed to use this file. 3 | # Download at: https://aka.ms/vscode/rest-client 4 | ################################################################## 5 | 6 | @api_host = http://localhost:7071 7 | 8 | ### Upload PDF Document 9 | POST {{api_host}}/api/documents 10 | Accept: */* 11 | Content-Type: multipart/form-data; boundary=Boundary 12 | 13 | --Boundary 14 | Content-Disposition: form-data; name="file"; filename="support.pdf" 15 | Content-Type: application/pdf 16 | 17 | < ../../data/support.pdf 18 | --Boundary-- 19 | 20 | ### Retrieve PDF document 21 | GET {{api_host}}/api/documents/support.pdf 22 | 23 | ### Chat with the bot 24 | POST {{api_host}}/api/chats/stream?userId=1 25 | Content-Type: application/json 26 | 27 | { 28 | "messages": [ 29 | { 30 | "content": "How to Search and Book Rentals?", 31 | "role": "user" 32 | } 33 | ], 34 | "context": { 35 | "sessionId": "123" 36 | } 37 | } 38 | 39 | ### Retrieve all chat sessions 40 | GET {{api_host}}/api/chats?userId=1 41 | 42 | ### Retrieve a chat session 43 | GET {{api_host}}/api/chats/123?userId=1 44 | 45 | ### Delete a chat session 46 | DELETE {{api_host}}/api/chats/123?userId=1 47 | -------------------------------------------------------------------------------- /docs/cost.md: -------------------------------------------------------------------------------- 1 | ## Cost estimation 2 | 3 | Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage. 4 | However, you can use the [Azure pricing calculator](https://azure.com/e/aa7deadafa0f4980a91308de010299bc) for the resources below to get an estimate. 5 | 6 | - Azure Functions: Consumption plan, Free for the first 1M executions. Pricing per execution and memory used. [Pricing](https://azure.microsoft.com/pricing/details/functions/) 7 | - Azure Static Web Apps: Free tier, 100GB bandwidth. Pricing per GB served. [Pricing](https://azure.microsoft.com/pricing/details/app-service/static/) 8 | - Azure OpenAI: Standard tier, GPT and Ada models. Pricing per 1K tokens used, and at least 1K tokens are used per question. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/) 9 | - Azure Cosmos DB: Serverless tier. Pricing per request unit (RU). [Pricing](https://azure.microsoft.com/pricing/details/cosmos-db/autoscale-provisioned/) 10 | - Azure Blob Storage: Standard tier with LRS. Pricing per GB stored and data transfer. [Pricing](https://azure.microsoft.com/pricing/details/storage/blobs/) 11 | 12 | ⚠️ To avoid unnecessary costs, remember to take down your app if it's no longer in use, 13 | either by deleting the resource group in the Portal or running `azd down --purge`. 14 | -------------------------------------------------------------------------------- /packages/api/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "api", 3 | "version": "1.0.0", 4 | "description": "Azure Functions API for the serverless AI Chat RAG sample", 5 | "main": "dist/src/functions/*.js", 6 | "scripts": { 7 | "build": "tsc", 8 | "watch": "tsc -w", 9 | "clean": "rimraf dist", 10 | "prestart": "npm run clean && npm run build", 11 | "start:host": "func start", 12 | "start": "concurrently npm:start:host npm:watch --raw --kill-others" 13 | }, 14 | "author": "Microsoft", 15 | "license": "MIT", 16 | "dependencies": { 17 | "@azure/functions": "^4.0.0", 18 | "@azure/identity": "^4.2.0", 19 | "@azure/search-documents": "^12.0.0", 20 | "@azure/storage-blob": "^12.17.0", 21 | "@langchain/azure-cosmosdb": "^0.2.2", 22 | "@langchain/community": "^0.3.15", 23 | "@langchain/core": "^0.3.18", 24 | "@langchain/ollama": "^0.2.0", 25 | "@langchain/openai": "^0.5.10", 26 | "@langchain/textsplitters": "^0.1.0", 27 | "@microsoft/ai-chat-protocol": "^1.0.0-beta.20240814.1", 28 | "dotenv": "^16.4.5", 29 | "faiss-node": "^0.5.1", 30 | "langchain": "^0.3.6", 31 | "pdf-parse": "^1.1.1", 32 | "uuid": "^11.0.3" 33 | }, 34 | "devDependencies": { 35 | "@types/node": "^20.x", 36 | "azure-functions-core-tools": "^4.0.5611", 37 | "typescript": "^5.4.4" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /infra/core/monitor/monitoring.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Application Insights instance and a Log Analytics workspace.' 2 | param logAnalyticsName string 3 | param applicationInsightsName string 4 | param applicationInsightsDashboardName string = '' 5 | param location string = resourceGroup().location 6 | param tags object = {} 7 | 8 | module logAnalytics 'loganalytics.bicep' = { 9 | name: 'loganalytics' 10 | params: { 11 | name: logAnalyticsName 12 | location: location 13 | tags: tags 14 | } 15 | } 16 | 17 | module applicationInsights 'applicationinsights.bicep' = { 18 | name: 'applicationinsights' 19 | params: { 20 | name: applicationInsightsName 21 | location: location 22 | tags: tags 23 | dashboardName: applicationInsightsDashboardName 24 | logAnalyticsWorkspaceId: logAnalytics.outputs.id 25 | } 26 | } 27 | 28 | output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString 29 | output applicationInsightsId string = applicationInsights.outputs.id 30 | output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey 31 | output applicationInsightsName string = applicationInsights.outputs.name 32 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id 33 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name 34 | -------------------------------------------------------------------------------- /packages/api/src/security.ts: -------------------------------------------------------------------------------- 1 | import { HttpRequest } from '@azure/functions'; 2 | import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity'; 3 | 4 | const azureOpenAiScope = 'https://cognitiveservices.azure.com/.default'; 5 | 6 | let credentials: DefaultAzureCredential | undefined; 7 | 8 | export function getCredentials(): DefaultAzureCredential { 9 | // Use the current user identity to authenticate. 10 | // No secrets needed, it uses `az login` or `azd auth login` locally, 11 | // and managed identity when deployed on Azure. 12 | credentials ||= new DefaultAzureCredential(); 13 | return credentials; 14 | } 15 | 16 | export function getAzureOpenAiTokenProvider() { 17 | return getBearerTokenProvider(getCredentials(), azureOpenAiScope); 18 | } 19 | 20 | export function getUserId(request: HttpRequest, body?: any): string | undefined { 21 | let userId: string | undefined; 22 | 23 | // Get the user ID from Azure easy auth if it's available 24 | try { 25 | const token = Buffer.from(request.headers.get('x-ms-client-principal') ?? '', 'base64').toString('ascii'); 26 | const infos = token && JSON.parse(token); 27 | userId = infos?.userId; 28 | } catch {} 29 | 30 | // Get the user ID from the request as a fallback 31 | userId ??= body?.context?.userId ?? request.query.get('userId') ?? undefined; 32 | 33 | return userId; 34 | } 35 | -------------------------------------------------------------------------------- /infra/main.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environmentName": { 6 | "value": "${AZURE_ENV_NAME}" 7 | }, 8 | "resourceGroupName": { 9 | "value": "${AZURE_RESOURCE_GROUP}" 10 | }, 11 | "location": { 12 | "value": "${AZURE_LOCATION}" 13 | }, 14 | "principalId": { 15 | "value": "${AZURE_PRINCIPAL_ID}" 16 | }, 17 | "openAiLocation": { 18 | "value": "${AZURE_OPENAI_LOCATION=eastus2}" 19 | }, 20 | "openAiApiVersion": { 21 | "value": "${AZURE_OPENAI_API_VERSION=2024-02-01}" 22 | }, 23 | "chatModelName": { 24 | "value": "${AZURE_OPENAI_API_MODEL=gpt-4o-mini}" 25 | }, 26 | "chatModelVersion": { 27 | "value": "${AZURE_OPENAI_API_MODEL_VERSION=2024-07-18}" 28 | }, 29 | "embeddingsModelName": { 30 | "value": "${AZURE_OPENAI_API_EMBEDDINGS_MODEL=text-embedding-ada-002}" 31 | }, 32 | "embeddingsModelVersion": { 33 | "value": "${AZURE_OPENAI_API_EMBEDDINGS_MODEL_VERSION=2}" 34 | }, 35 | "webappLocation": { 36 | "value": "${AZURE_WEBAPP_LOCATION=eastus2}" 37 | }, 38 | "useVnet": { 39 | "value": "${USE_VNET=false}" 40 | }, 41 | "isContinuousDeployment": { 42 | "value": "${CI=false}" 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "func", 6 | "label": "func: host start", 7 | "command": "host start", 8 | "problemMatcher": "$func-node-watch", 9 | "isBackground": true, 10 | "dependsOn": "npm build (functions)", 11 | "options": { 12 | "cwd": "${workspaceFolder}/packages/api" 13 | } 14 | }, 15 | { 16 | "type": "shell", 17 | "label": "npm build (functions)", 18 | "command": "npm run build", 19 | "dependsOn": "npm clean (functions)", 20 | "problemMatcher": "$tsc", 21 | "options": { 22 | "cwd": "${workspaceFolder}/packages/api" 23 | } 24 | }, 25 | { 26 | "type": "shell", 27 | "label": "npm install (functions)", 28 | "command": "npm install", 29 | "options": { 30 | "cwd": "${workspaceFolder}/packages/api" 31 | } 32 | }, 33 | { 34 | "type": "shell", 35 | "label": "npm prune (functions)", 36 | "command": "npm prune --production", 37 | "dependsOn": "npm build (functions)", 38 | "problemMatcher": [], 39 | "options": { 40 | "cwd": "${workspaceFolder}/packages/api" 41 | } 42 | }, 43 | { 44 | "type": "shell", 45 | "label": "npm clean (functions)", 46 | "command": "npm run clean", 47 | "dependsOn": "npm install (functions)", 48 | "options": { 49 | "cwd": "${workspaceFolder}/packages/api" 50 | } 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /docs/enhance-security.md: -------------------------------------------------------------------------------- 1 | # Enhance security 2 | 3 | To achieve enterprise grade security we've ensured you can leverage the features below through an opt-in flag: 4 | 5 | - **Deploy in a [virtual network](https://learn.microsoft.com/azure/virtual-network/virtual-networks-overview)**, to restrict access to the resources including the Azure Functions API and the Azure Storage where the documents are stored. 6 | 7 | - **Leverage [Azure Entra managed identity](https://learn.microsoft.com/entra/identity/managed-identities-azure-resources/overview)** to disable all local authentication methods (ie API keys) and rely [Role-based Access Control (RBAC)](https://learn.microsoft.com/azure/role-based-access-control/overview). 8 | 9 | You can enable these features when deploying this sample by following these steps: 10 | 11 | 1. Create a new environment for your deployment (you cannot update an existing one): 12 | ```bash 13 | azd env create my-secure-env 14 | ``` 15 | 2. Enable the virtual network feature and disable local authentication: 16 | ```bash 17 | azd env set USE_VNET true 18 | ``` 19 | 3. Deploy the sample to the new environment: 20 | ```bash 21 | azd up 22 | ``` 23 | 24 | Note that enabling virtual network will induce additional costs, as it requires the deployment of extra resources and needs to switch to paid plans for the Azure Functions and Azure Static Web App. 25 | 26 | > [!IMPORTANT] 27 | > When VNET is enabled, you will lose the ability to run the sample locally while connected to Azure resources. 28 | > You can always fall back to using a local AI model and database for development purposes, by deleting the `api/.env` file 29 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node 3 | { 4 | "name": "Node.js", 5 | 6 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 7 | "image": "mcr.microsoft.com/devcontainers/javascript-node:20-bullseye", 8 | 9 | // Features to add to the dev container. More info: https://containers.dev/features. 10 | "features": { 11 | "ghcr.io/devcontainers/features/node:1": { 12 | "version": "20" 13 | }, 14 | "ghcr.io/devcontainers/features/azure-cli:1": { 15 | "version": "latest", 16 | "installBicep": true 17 | }, 18 | "ghcr.io/devcontainers/features/github-cli:1": {}, 19 | "ghcr.io/devcontainers/features/powershell:1": {}, 20 | "ghcr.io/azure/azure-dev/azd:latest": {} 21 | }, 22 | 23 | // Configure tool-specific properties. 24 | "customizations": { 25 | "vscode": { 26 | "extensions": [ 27 | "ms-azuretools.azure-dev", 28 | "ms-azuretools.vscode-bicep", 29 | "esbenp.prettier-vscode", 30 | "humao.rest-client", 31 | "runem.lit-plugin" 32 | ] 33 | } 34 | }, 35 | 36 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 37 | "forwardPorts": [8000, 7071], 38 | 39 | // Use 'postCreateCommand' to run commands after the container is created. 40 | "postCreateCommand": "npm install && npm install -g fuzz-run" 41 | 42 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 43 | // "remoteUser": "root" 44 | } 45 | -------------------------------------------------------------------------------- /scripts/upload-documents.js: -------------------------------------------------------------------------------- 1 | import fs from 'node:fs/promises'; 2 | import path from 'node:path'; 3 | 4 | // This script uploads all PDF files from the 'data' folder to the ingestion API. 5 | // It does a Node.js equivalent of this bash script: 6 | // ``` 7 | // for file in data/*.pdf; do 8 | // curl -X POST -F "file=@$file" /api/documents 9 | // done 10 | // ``` 11 | async function uploadDocuments(apiUrl, dataFolder) { 12 | try { 13 | const uploadUrl = `${apiUrl}/api/documents`; 14 | const files = await fs.readdir(dataFolder); 15 | console.log(`Uploading documents to: ${uploadUrl}`); 16 | 17 | /* eslint-disable no-await-in-loop */ 18 | for (const file of files) { 19 | if (path.extname(file).toLowerCase() === '.pdf') { 20 | const data = await fs.readFile(path.join(dataFolder, file)); 21 | const blobParts = new Array(data); 22 | const formData = new FormData(); 23 | formData.append('file', new File(blobParts, file)); 24 | 25 | const response = await fetch(uploadUrl, { 26 | method: 'post', 27 | body: formData, 28 | }); 29 | 30 | const responseData = await response.json(); 31 | if (response.ok) { 32 | console.log(`${file}: ${responseData.message}`); 33 | } else { 34 | throw new Error(responseData.error); 35 | } 36 | } 37 | } 38 | /* eslint-enable no-await-in-loop */ 39 | } catch (error) { 40 | console.error(`Could not upload documents: ${error.message}`); 41 | process.exitCode = -1; 42 | } 43 | } 44 | 45 | const apiUrl = process.argv[2]; 46 | if (apiUrl) { 47 | await uploadDocuments(apiUrl, 'data'); 48 | } else { 49 | console.log('Usage: node upload-documents.js '); 50 | process.exitCode = -1; 51 | } 52 | -------------------------------------------------------------------------------- /infra/core/ai/cognitiveservices.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure Cognitive Services instance.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | @description('The custom subdomain name used to access the API. Defaults to the value of the name parameter.') 6 | param customSubDomainName string = name 7 | param disableLocalAuth bool = false 8 | param deployments array = [] 9 | param kind string = 'OpenAI' 10 | 11 | @allowed([ 'Enabled', 'Disabled' ]) 12 | param publicNetworkAccess string = 'Enabled' 13 | param sku object = { 14 | name: 'S0' 15 | } 16 | 17 | param allowedIpRules array = [] 18 | param networkAcls object = empty(allowedIpRules) ? { 19 | defaultAction: 'Allow' 20 | } : { 21 | ipRules: allowedIpRules 22 | defaultAction: 'Deny' 23 | } 24 | 25 | resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = { 26 | name: name 27 | location: location 28 | tags: tags 29 | kind: kind 30 | properties: { 31 | customSubDomainName: customSubDomainName 32 | publicNetworkAccess: publicNetworkAccess 33 | networkAcls: networkAcls 34 | disableLocalAuth: disableLocalAuth 35 | } 36 | sku: sku 37 | } 38 | 39 | @batchSize(1) 40 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: { 41 | parent: account 42 | name: deployment.name 43 | properties: { 44 | model: deployment.model 45 | raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null 46 | } 47 | sku: contains(deployment, 'sku') ? deployment.sku : { 48 | name: 'Standard' 49 | capacity: 20 50 | } 51 | }] 52 | 53 | output endpoint string = account.properties.endpoint 54 | output endpoints object = account.properties.endpoints 55 | output id string = account.id 56 | output name string = account.name 57 | -------------------------------------------------------------------------------- /docs/troubleshooting.md: -------------------------------------------------------------------------------- 1 | ## Troubleshooting 2 | 3 | Here are the most common failure scenarios and solutions: 4 | 5 | 1. The subscription (`AZURE_SUBSCRIPTION_ID`) doesn't have access to the Azure OpenAI service. Please ensure `AZURE_SUBSCRIPTION_ID` matches the ID specified in the [OpenAI access request process](https://aka.ms/oai/access). 6 | 7 | 1. You're attempting to create resources in regions not enabled for Azure OpenAI (e.g. East US 2 instead of East US), or where the model you're trying to use isn't enabled. See [this matrix of model availability](https://aka.ms/oai/models). 8 | 9 | 1. You've exceeded a quota, most often number of resources per region. See [this article on quotas and limits](https://aka.ms/oai/quotas). If this happens, you have a few options: 10 | 11 | - Delete other unused resources in the region you're trying to deploy to. 12 | - Deploy to a different region. 13 | - Try to use a different model or adjust the capacity in `infra/main.bicep` (see [the FAQ](faq.md)). 14 | - Request a quota increase. 15 | 16 | 1. You're getting "same resource name not allowed" conflicts. That's likely because you've run the sample multiple times and deleted the resources you've been creating each time, but are forgetting to purge them. Azure keeps resources for 48 hours unless you purge from soft delete. See [this article on purging resources](https://learn.microsoft.com/azure/ai-services/recover-purge-resources?tabs=azure-portal#purge-a-deleted-resource). 17 | 18 | 1. After running `azd up` and visiting the website, the website takes a long time to load and answer on the first request. Because we're using serverless technologies, the first request to the Azure Functions API might take a few seconds to start up. This happens because the service is scaled to zero when not in use to optimize the costs, and it takes a few seconds to start up when it's first accessed. However, this slight delay can be removed by using the [Azure Functions Premium plan](https://learn.microsoft.com/azure/azure-functions/functions-premium-plan). 19 | -------------------------------------------------------------------------------- /packages/api/src/functions/chats-delete.ts: -------------------------------------------------------------------------------- 1 | import process from 'node:process'; 2 | import { HttpRequest, HttpResponseInit, InvocationContext, app } from '@azure/functions'; 3 | import { AzureCosmsosDBNoSQLChatMessageHistory } from '@langchain/azure-cosmosdb'; 4 | import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system'; 5 | import 'dotenv/config'; 6 | import { badRequest, ok, notFound } from '../http-response.js'; 7 | import { getCredentials, getUserId } from '../security.js'; 8 | 9 | async function deleteChats(request: HttpRequest, context: InvocationContext): Promise { 10 | const azureCosmosDbEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT; 11 | const { sessionId } = request.params; 12 | const userId = getUserId(request); 13 | 14 | if (!userId) { 15 | return badRequest('Invalid or missing userId in the request'); 16 | } 17 | 18 | if (!sessionId) { 19 | return badRequest('Invalid or missing sessionId in the request'); 20 | } 21 | 22 | try { 23 | let chatHistory; 24 | 25 | if (azureCosmosDbEndpoint) { 26 | const credentials = getCredentials(); 27 | chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({ 28 | sessionId, 29 | userId, 30 | credentials, 31 | }); 32 | } else { 33 | // If no environment variables are set, it means we are running locally 34 | context.log('No Azure CosmosDB endpoint set, using local file'); 35 | 36 | chatHistory = new FileSystemChatMessageHistory({ 37 | sessionId, 38 | userId, 39 | }); 40 | } 41 | 42 | await chatHistory.clear(); 43 | return ok(); 44 | } catch (_error: unknown) { 45 | const error = _error as Error; 46 | context.error(`Error when processing chats-delete request: ${error.message}`); 47 | 48 | return notFound('Session not found'); 49 | } 50 | } 51 | 52 | app.http('chats-delete', { 53 | route: 'chats/{sessionId}', 54 | methods: ['DELETE'], 55 | authLevel: 'anonymous', 56 | handler: deleteChats, 57 | }); 58 | -------------------------------------------------------------------------------- /packages/webapp/src/message-parser.ts: -------------------------------------------------------------------------------- 1 | import { type HTMLTemplateResult, html, nothing } from 'lit'; 2 | import { AIChatMessage } from '@microsoft/ai-chat-protocol'; 3 | 4 | export type ParsedMessage = { 5 | html: HTMLTemplateResult; 6 | citations: string[]; 7 | followupQuestions: string[]; 8 | role: string; 9 | context?: object; 10 | }; 11 | 12 | export function parseMessageIntoHtml( 13 | message: AIChatMessage, 14 | renderCitationReference: (citation: string, index: number) => HTMLTemplateResult, 15 | ): ParsedMessage { 16 | if (message.role === 'user') { 17 | return { 18 | html: html`${message.content}`, 19 | citations: [], 20 | followupQuestions: [], 21 | role: message.role, 22 | context: message.context, 23 | }; 24 | } 25 | 26 | const citations: string[] = []; 27 | const followupQuestions: string[] = []; 28 | 29 | // Extract any follow-up questions that might be in the message 30 | const text = message.content 31 | .replaceAll(/<<([^>]+)>>/g, (_match, content: string) => { 32 | followupQuestions.push(content); 33 | return ''; 34 | }) 35 | .split('<<')[0] // Truncate incomplete questions 36 | .trim(); 37 | 38 | // Extract any citations that might be in the message 39 | const parts = text.split(/\[([^\]]+)]/g); 40 | const result = html`${parts.map((part, index) => { 41 | if (index % 2 === 0) { 42 | return html`${part}`; 43 | } 44 | 45 | if (index + 1 < parts.length) { 46 | // Handle only completed citations 47 | let citationIndex = citations.indexOf(part); 48 | if (citationIndex === -1) { 49 | citations.push(part); 50 | citationIndex = citations.length; 51 | } else { 52 | citationIndex++; 53 | } 54 | 55 | return renderCitationReference(part, citationIndex); 56 | } 57 | 58 | return nothing; 59 | })}`; 60 | 61 | return { 62 | html: result, 63 | citations, 64 | followupQuestions, 65 | role: message.role, 66 | context: message.context, 67 | }; 68 | } 69 | -------------------------------------------------------------------------------- /packages/api/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | 18 | # Directory for instrumented libs generated by jscoverage/JSCover 19 | lib-cov 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | 24 | # nyc test coverage 25 | .nyc_output 26 | 27 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 28 | .grunt 29 | 30 | # Bower dependency directory (https://bower.io/) 31 | bower_components 32 | 33 | # node-waf configuration 34 | .lock-wscript 35 | 36 | # Compiled binary addons (https://nodejs.org/api/addons.html) 37 | build/Release 38 | 39 | # Dependency directories 40 | node_modules/ 41 | jspm_packages/ 42 | 43 | # TypeScript v1 declaration files 44 | typings/ 45 | 46 | # Optional npm cache directory 47 | .npm 48 | 49 | # Optional eslint cache 50 | .eslintcache 51 | 52 | # Optional REPL history 53 | .node_repl_history 54 | 55 | # Output of 'npm pack' 56 | *.tgz 57 | 58 | # Yarn Integrity file 59 | .yarn-integrity 60 | 61 | # dotenv environment variables file 62 | .env 63 | .env.test 64 | 65 | # parcel-bundler cache (https://parceljs.org/) 66 | .cache 67 | 68 | # next.js build output 69 | .next 70 | 71 | # nuxt.js build output 72 | .nuxt 73 | 74 | # vuepress build output 75 | .vuepress/dist 76 | 77 | # Serverless directories 78 | .serverless/ 79 | 80 | # FuseBox cache 81 | .fusebox/ 82 | 83 | # DynamoDB Local files 84 | .dynamodb/ 85 | 86 | # TypeScript output 87 | dist 88 | out 89 | 90 | # Azure Functions artifacts 91 | bin 92 | obj 93 | appsettings.json 94 | local.settings.json 95 | 96 | # Azurite artifacts 97 | __blobstorage__ 98 | __queuestorage__ 99 | __azurite_db*__.json 100 | 101 | # Faiss folder 102 | .faiss/ 103 | 104 | # Chat history 105 | .history/ 106 | -------------------------------------------------------------------------------- /infra/app/api.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param appServicePlanId string 6 | param storageAccountName string 7 | param virtualNetworkSubnetId string 8 | param applicationInsightsName string 9 | param allowedOrigins array 10 | param appSettings object 11 | param staticWebAppName string = '' 12 | 13 | var useVnet = !empty(virtualNetworkSubnetId) 14 | var finalApi = useVnet ? apiFlex : api 15 | 16 | module apiFlex '../core/host/functions-flex.bicep' = if (useVnet) { 17 | name: 'api-flex' 18 | scope: resourceGroup() 19 | params: { 20 | name: name 21 | location: location 22 | tags: tags 23 | allowedOrigins: allowedOrigins 24 | alwaysOn: false 25 | runtimeName: 'node' 26 | runtimeVersion: '20' 27 | appServicePlanId: appServicePlanId 28 | storageAccountName: storageAccountName 29 | applicationInsightsName: applicationInsightsName 30 | virtualNetworkSubnetId: virtualNetworkSubnetId 31 | appSettings: appSettings 32 | } 33 | } 34 | 35 | module api '../core/host/functions.bicep' = if (!useVnet) { 36 | name: 'api-consumption' 37 | scope: resourceGroup() 38 | params: { 39 | name: name 40 | location: location 41 | tags: tags 42 | allowedOrigins: allowedOrigins 43 | alwaysOn: false 44 | runtimeName: 'node' 45 | runtimeVersion: '20' 46 | appServicePlanId: appServicePlanId 47 | storageAccountName: storageAccountName 48 | applicationInsightsName: applicationInsightsName 49 | managedIdentity: true 50 | appSettings: appSettings 51 | } 52 | } 53 | 54 | // Link the Function App to the Static Web App 55 | module linkedBackend './linked-backend.bicep' = if (useVnet) { 56 | name: 'linkedbackend' 57 | scope: resourceGroup() 58 | params: { 59 | staticWebAppName: staticWebAppName 60 | backendResourceId: finalApi.outputs.id 61 | backendLocation: location 62 | } 63 | } 64 | 65 | output identityPrincipalId string = finalApi.outputs.identityPrincipalId 66 | output name string = finalApi.outputs.name 67 | output uri string = finalApi.outputs.uri 68 | -------------------------------------------------------------------------------- /infra/app/vnet.bicep: -------------------------------------------------------------------------------- 1 | @description('Specifies the name of the virtual network.') 2 | param name string 3 | 4 | @description('Specifies the location.') 5 | param location string = resourceGroup().location 6 | 7 | @description('Specifies the name of the subnet for Function App virtual network integration.') 8 | param appSubnetName string = 'app' 9 | 10 | param tags object = {} 11 | 12 | resource virtualNetwork 'Microsoft.Network/virtualNetworks@2023-05-01' = { 13 | name: name 14 | location: location 15 | tags: tags 16 | properties: { 17 | addressSpace: { 18 | addressPrefixes: [ 19 | '10.0.0.0/16' 20 | ] 21 | } 22 | encryption: { 23 | enabled: false 24 | enforcement: 'AllowUnencrypted' 25 | } 26 | subnets: [ 27 | { 28 | name: appSubnetName 29 | id: resourceId('Microsoft.Network/virtualNetworks/subnets', name, 'app') 30 | properties: { 31 | addressPrefixes: [ 32 | '10.0.1.0/24' 33 | ] 34 | delegations: [ 35 | { 36 | name: 'delegation' 37 | id: '${resourceId('Microsoft.Network/virtualNetworks/subnets', name, 'app')}/delegations/delegation' 38 | properties: { 39 | //Microsoft.App/environments is the correct delegation for Flex Consumption VNet integration 40 | serviceName: 'Microsoft.App/environments' 41 | } 42 | type: 'Microsoft.Network/virtualNetworks/subnets/delegations' 43 | } 44 | ] 45 | serviceEndpoints: [ 46 | { 47 | service: 'Microsoft.Storage' 48 | locations: [ 49 | resourceGroup().location 50 | ] 51 | } 52 | ] 53 | privateEndpointNetworkPolicies: 'Disabled' 54 | privateLinkServiceNetworkPolicies: 'Enabled' 55 | } 56 | type: 'Microsoft.Network/virtualNetworks/subnets' 57 | } 58 | ] 59 | virtualNetworkPeerings: [] 60 | enableDdosProtection: false 61 | } 62 | } 63 | 64 | output appSubnetName string = virtualNetwork.properties.subnets[0].name 65 | output appSubnetID string = virtualNetwork.properties.subnets[0].id 66 | -------------------------------------------------------------------------------- /packages/api/src/functions/chats-get.ts: -------------------------------------------------------------------------------- 1 | import process from 'node:process'; 2 | import { HttpRequest, HttpResponseInit, InvocationContext, app } from '@azure/functions'; 3 | import { AzureCosmsosDBNoSQLChatMessageHistory } from '@langchain/azure-cosmosdb'; 4 | import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system'; 5 | import 'dotenv/config'; 6 | import { badRequest, ok, notFound } from '../http-response.js'; 7 | import { getCredentials, getUserId } from '../security.js'; 8 | 9 | async function getChats(request: HttpRequest, context: InvocationContext): Promise { 10 | const azureCosmosDbEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT; 11 | const { sessionId } = request.params; 12 | const userId = getUserId(request); 13 | 14 | if (!userId) { 15 | return badRequest('Invalid or missing userId in the request'); 16 | } 17 | 18 | try { 19 | let chatHistory; 20 | 21 | if (azureCosmosDbEndpoint) { 22 | const credentials = getCredentials(); 23 | chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({ 24 | sessionId, 25 | userId, 26 | credentials, 27 | }); 28 | } else { 29 | // If no environment variables are set, it means we are running locally 30 | context.log('No Azure CosmosDB endpoint set, using local file'); 31 | 32 | chatHistory = new FileSystemChatMessageHistory({ 33 | sessionId, 34 | userId, 35 | }); 36 | } 37 | 38 | if (sessionId) { 39 | const messages = await chatHistory.getMessages(); 40 | const chatMessages = messages.map((message) => ({ 41 | role: message.getType() === 'human' ? 'user' : 'assistant', 42 | content: message.content, 43 | })); 44 | return ok(chatMessages); 45 | } 46 | 47 | const sessions = await chatHistory.getAllSessions(); 48 | const chatSessions = sessions.map((session) => ({ 49 | id: session.id, 50 | title: session.context?.title, 51 | })); 52 | return ok(chatSessions); 53 | } catch (_error: unknown) { 54 | const error = _error as Error; 55 | context.error(`Error when processing chats-get request: ${error.message}`); 56 | 57 | return notFound('Session not found'); 58 | } 59 | } 60 | 61 | app.http('chats-get', { 62 | route: 'chats/{sessionId?}', 63 | methods: ['GET'], 64 | authLevel: 'anonymous', 65 | handler: getChats, 66 | }); 67 | -------------------------------------------------------------------------------- /packages/api/src/functions/documents-get.ts: -------------------------------------------------------------------------------- 1 | import process from 'node:process'; 2 | import fs from 'node:fs/promises'; 3 | import { join } from 'node:path'; 4 | import { finished } from 'node:stream/promises'; 5 | import { HttpRequest, HttpResponseInit, InvocationContext, app } from '@azure/functions'; 6 | import { BlobServiceClient } from '@azure/storage-blob'; 7 | import 'dotenv/config'; 8 | import { data, notFound } from '../http-response.js'; 9 | import { getCredentials } from '../security.js'; 10 | 11 | async function getDocument(request: HttpRequest, context: InvocationContext): Promise { 12 | const storageUrl = process.env.AZURE_STORAGE_URL; 13 | const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME; 14 | const { fileName } = request.params; 15 | 16 | try { 17 | let fileData: Uint8Array; 18 | 19 | if (storageUrl && containerName) { 20 | // Retrieve the file from Azure Blob Storage 21 | context.log(`Reading blob from: "${containerName}/${fileName}"`); 22 | 23 | const credentials = getCredentials(); 24 | const blobServiceClient = new BlobServiceClient(storageUrl, credentials); 25 | const containerClient = blobServiceClient.getContainerClient(containerName); 26 | const response = await containerClient.getBlobClient(fileName).download(); 27 | 28 | fileData = await streamToBuffer(response.readableStreamBody!); 29 | } else { 30 | // If no environment variables are set, it means we are running locally 31 | context.log(`Reading file from local file system: "data/${fileName}"`); 32 | const filePath = join(__dirname, '../../../../../data', fileName); 33 | 34 | fileData = await fs.readFile(filePath); 35 | } 36 | 37 | return data(fileData, { 'content-type': 'application/pdf' }); 38 | } catch (_error: unknown) { 39 | const error = _error as Error; 40 | context.error(`Error when processing document-get request: ${error.message}`); 41 | 42 | return notFound('Document not found'); 43 | } 44 | } 45 | 46 | async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise { 47 | const chunks: Uint8Array[] = []; 48 | readableStream.on('data', (data) => { 49 | chunks.push(Buffer.from(data)); 50 | }); 51 | await finished(readableStream); 52 | return Buffer.concat(chunks); 53 | } 54 | 55 | app.http('documents-get', { 56 | route: 'documents/{fileName}', 57 | methods: ['GET'], 58 | authLevel: 'anonymous', 59 | handler: getDocument, 60 | }); 61 | -------------------------------------------------------------------------------- /.github/workflows/azure-dev.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy on Azure 2 | on: 3 | workflow_dispatch: 4 | push: 5 | # Run when commits are pushed to mainline branch (main) 6 | # Set this to the mainline branch you are using 7 | branches: [main] 8 | 9 | # GitHub Actions workflow to deploy to Azure using azd 10 | # To configure required secrets for connecting to Azure, simply run `azd pipeline config` 11 | 12 | # Set up permissions for deploying with secretless Azure federated credentials 13 | # https://learn.microsoft.com/en-us/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication 14 | permissions: 15 | id-token: write 16 | contents: read 17 | 18 | jobs: 19 | build: 20 | runs-on: ubuntu-latest 21 | env: 22 | AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }} 23 | AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }} 24 | AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }} 25 | AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }} 26 | steps: 27 | - name: Checkout 28 | uses: actions/checkout@v4 29 | 30 | - name: Install azd 31 | uses: Azure/setup-azd@v2 32 | 33 | - name: Install Nodejs 34 | uses: actions/setup-node@v4 35 | with: 36 | node-version: 20 37 | 38 | - name: Log in with Azure (Federated Credentials) 39 | if: ${{ env.AZURE_CLIENT_ID != '' }} 40 | run: | 41 | azd auth login ` 42 | --client-id "$Env:AZURE_CLIENT_ID" ` 43 | --federated-credential-provider "github" ` 44 | --tenant-id "$Env:AZURE_TENANT_ID" 45 | shell: pwsh 46 | 47 | - name: Log in with Azure (Client Credentials) 48 | if: ${{ env.AZURE_CREDENTIALS != '' }} 49 | run: | 50 | $info = $Env:AZURE_CREDENTIALS | ConvertFrom-Json -AsHashtable; 51 | Write-Host "::add-mask::$($info.clientSecret)" 52 | 53 | azd auth login ` 54 | --client-id "$($info.clientId)" ` 55 | --client-secret "$($info.clientSecret)" ` 56 | --tenant-id "$($info.tenantId)" 57 | shell: pwsh 58 | env: 59 | AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }} 60 | 61 | - name: Provision and deploy application 62 | run: azd up --no-prompt 63 | env: 64 | AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }} 65 | AZURE_LOCATION: ${{ vars.AZURE_LOCATION }} 66 | AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }} 67 | -------------------------------------------------------------------------------- /packages/webapp/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | AI Chat with Enterprise Data 9 | 58 | 59 | 60 | 64 |
65 | 66 | 67 |
68 | 69 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | - Full paths of source file(s) related to the manifestation of the issue 23 | - The location of the affected source code (tag/branch/commit or direct URL) 24 | - Any special configuration required to reproduce the issue 25 | - Step-by-step instructions to reproduce the issue 26 | - Proof-of-concept or exploit code (if possible) 27 | - Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /infra/core/storage/storage-account.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure storage account.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | @allowed([ 7 | 'Cool' 8 | 'Hot' 9 | 'Premium' ]) 10 | param accessTier string = 'Hot' 11 | param allowBlobPublicAccess bool = true 12 | param allowCrossTenantReplication bool = true 13 | param allowSharedKeyAccess bool = true 14 | param containers array = [] 15 | param corsRules array = [] 16 | param defaultToOAuthAuthentication bool = false 17 | param deleteRetentionPolicy object = {} 18 | @allowed([ 'AzureDnsZone', 'Standard' ]) 19 | param dnsEndpointType string = 'Standard' 20 | param files array = [] 21 | param kind string = 'StorageV2' 22 | param minimumTlsVersion string = 'TLS1_2' 23 | param queues array = [] 24 | param shareDeleteRetentionPolicy object = {} 25 | param supportsHttpsTrafficOnly bool = true 26 | param tables array = [] 27 | param networkAcls object = { 28 | bypass: 'AzureServices' 29 | defaultAction: 'Allow' 30 | } 31 | @allowed([ 'Enabled', 'Disabled' ]) 32 | param publicNetworkAccess string = 'Enabled' 33 | param sku object = { name: 'Standard_LRS' } 34 | 35 | resource storage 'Microsoft.Storage/storageAccounts@2023-01-01' = { 36 | name: name 37 | location: location 38 | tags: tags 39 | kind: kind 40 | sku: sku 41 | properties: { 42 | accessTier: accessTier 43 | allowBlobPublicAccess: allowBlobPublicAccess 44 | allowCrossTenantReplication: allowCrossTenantReplication 45 | allowSharedKeyAccess: allowSharedKeyAccess 46 | defaultToOAuthAuthentication: defaultToOAuthAuthentication 47 | dnsEndpointType: dnsEndpointType 48 | minimumTlsVersion: minimumTlsVersion 49 | networkAcls: networkAcls 50 | publicNetworkAccess: publicNetworkAccess 51 | supportsHttpsTrafficOnly: supportsHttpsTrafficOnly 52 | } 53 | 54 | resource blobServices 'blobServices' = if (!empty(containers)) { 55 | name: 'default' 56 | properties: { 57 | cors: { 58 | corsRules: corsRules 59 | } 60 | deleteRetentionPolicy: deleteRetentionPolicy 61 | } 62 | resource container 'containers' = [for container in containers: { 63 | name: container.name 64 | properties: { 65 | publicAccess: contains(container, 'publicAccess') ? container.publicAccess : 'None' 66 | } 67 | }] 68 | } 69 | 70 | resource fileServices 'fileServices' = if (!empty(files)) { 71 | name: 'default' 72 | properties: { 73 | cors: { 74 | corsRules: corsRules 75 | } 76 | shareDeleteRetentionPolicy: shareDeleteRetentionPolicy 77 | } 78 | } 79 | 80 | resource queueServices 'queueServices' = if (!empty(queues)) { 81 | name: 'default' 82 | properties: { 83 | 84 | } 85 | resource queue 'queues' = [for queue in queues: { 86 | name: queue.name 87 | properties: { 88 | metadata: {} 89 | } 90 | }] 91 | } 92 | 93 | resource tableServices 'tableServices' = if (!empty(tables)) { 94 | name: 'default' 95 | properties: {} 96 | } 97 | } 98 | 99 | output id string = storage.id 100 | output name string = storage.name 101 | output primaryEndpoints object = storage.properties.primaryEndpoints 102 | -------------------------------------------------------------------------------- /docs/old-tutorial/02-session.md: -------------------------------------------------------------------------------- 1 | # Session 02 - Init `CosmosDB LC Vector Store` in the project 2 | 3 | In this session, we will start creating the use of `CosmosDB LC vector store` to store the vectors generated by Azure OpenAI. 4 | 5 | ## Step 01 - Install packages 6 | 7 | Azure Cosmos DB for MongoDB vCore helps developers with a fully managed and scalable database, being compatible with MongoDB. In this part of the tutorial, we will be using Azure Cosmos DB for MongoDB vCore to store vector search resources. 8 | 9 | > know more: **[Use vector search on embeddings in Azure Cosmos DB for MongoDB vCore](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search)** 10 | > know more: **[Understand embeddings in Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/understand-embeddings)** 11 | 12 | Well, to start, let's install the following packages: 13 | 14 | ```bash 15 | npm install mongodb @langchain/community @langchain/core 16 | ``` 17 | 18 | Now, let's do the following, add the following code to the `package.json` file: 19 | 20 | - `package.json` 21 | 22 | ```json 23 | { 24 | "name": "api", 25 | "version": "1.0.0", 26 | "description": "a sample api demonstrating how to use Azure Functions with Azure OpenAI and LangChain.js", 27 | "scripts": { 28 | "build": "tsc", 29 | "watch": "tsc -w", 30 | "clean": "rimraf dist", 31 | "prestart": "npm run clean && npm run build", 32 | "start:host": "npm run prestart && func start", 33 | "start": "npm-run-all --parallel start:host watch", 34 | "test": "echo \"No tests yet...\"" 35 | }, 36 | "dependencies": { 37 | "@azure/functions": "^4.0.0", 38 | "@langchain/azure-openai": "^0.0.2", 39 | "@langchain/community": "^0.0.36", 40 | "dotenv": "^16.4.5", 41 | "langchain": "^0.1.25", 42 | "mongodb": "^6.5.0", 43 | "@langchain/core": "^0.1.44" 44 | }, 45 | "devDependencies": { 46 | "@types/node": "^18.x", 47 | "npm-run-all": "^4.1.5", 48 | "rimraf": "^5.0.0", 49 | "typescript": "^4.0.0" 50 | }, 51 | "main": "dist/src/index.js" 52 | } 53 | ``` 54 | 55 | ## Step 02 - Create a CosmosDB for MongoDB vCore cluster using Azure Portal 56 | 57 | Now we will need to create an Azure Cosmos DB for MongoDB vCore cluster. To do this, you can follow the step-by-step tutorial **[Quickstart: Create an Azure Cosmos DB for MongoDB vCore cluster by using the Azure portal](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/quickstart-portal)** 58 | 59 | After creating the cluster, we will need you to save the following information: 60 | 61 | - `cluster name` 62 | - `admin username` 63 | - `password` 64 | - `connection string` 65 | 66 | When you create the cluster, you can find this information in the `Connection String` tab of your cluster. 67 | 68 | After collecting this information, we'll include it in the project's `.env` file: 69 | 70 | - `.env` 71 | 72 | ```env 73 | AZURE_OPENAI_API_ENDPOINT="" 74 | AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME="" 75 | AZURE_OPENAI_API_KEY="" 76 | 77 | AZURE_COSMOSDB_CONNECTION_STRING="" 78 | ``` 79 | 80 | Where `:` is, you should replace it with: `:`, which you created in the previous step. 81 | 82 | Perfect! We already have our CosmosDB configured and ready to be used. Now let's move on to the next step! 83 | 84 | Now that we have Azure CosmosDB configured and ready to be used in our project, let's get back to our code! Let's move on to the next step! 85 | 86 | ▶ **[Next Step: Implement Upload API](./03-session.md)** 87 | -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | # Serverless AI Chat with RAG using LangChain.js 2 | 3 | Serverless TypeScript Retrieval-Augmented Generation (RAG) chat sample: Lit + Vite frontend (Azure Static Web Apps), Azure Functions backend with LangChain.js, Cosmos DB vector store, Blob Storage for source documents, optional Azure OpenAI or local Ollama models. Provisioned by Bicep & Azure Developer CLI (azd) with CI/CD. Focus: reliability, citations, low cost, clear extension points. 4 | 5 | > **MISSION**: Provide a maintained Azure reference implementation of a serverless LangChain.js RAG chat that showcases best practices (citations, reliability, tooling) while staying lean and easy to extend. 6 | 7 | ## Overview 8 | 9 | - End-user asks questions in a web UI; backend performs RAG: embed/query vector store (Cosmos DB or in‑memory/faiss fallback), assemble context, invoke LLM (Azure OpenAI or local Ollama), stream answer + citations to client. 10 | - Documents (PDF/others) uploaded -> chunked & embedded -> stored for retrieval; blob storage keeps originals. 11 | - Architecture (high level): 12 | - Frontend: `packages/webapp` (Lit components, served locally by Vite, deployed via Static Web Apps) 13 | - Backend: `packages/api` (Azure Functions isolated worker w/ LangChain.js chains) 14 | - Data: Cosmos DB (vector and chat history), Blob Storage (docs) 15 | - Infra: `infra/` Bicep templates composed by `infra/main.bicep`, parameters in `infra/main.parameters.json` 16 | - Scripts: ingestion helper in `scripts/upload-documents.js` 17 | 18 | ## Key Technologies and Frameworks 19 | 20 | - TypeScript (monorepo via npm workspaces) 21 | - Azure Functions (Node.js runtime v4) + LangChain.js core/community providers 22 | - Lit + Vite for frontend UI 23 | - Azure Cosmos DB (vector store via @langchain/azure-cosmosdb) / faiss-node (local alt) 24 | - Azure Blob Storage (document source persistence) 25 | - Azure OpenAI / Ollama (LLM + embeddings) 26 | - Infrastructure as Code: Bicep + Azure Developer CLI (azd) 27 | - CI/CD: GitHub Actions 28 | 29 | ## Constraints and Requirements 30 | 31 | - Maintain simplicity; avoid premature abstractions or heavy frameworks 32 | - No proprietary dependencies beyond Azure services (prefer OSS + Azure) 33 | 34 | ## Development Workflow 35 | 36 | Root scripts (run from repository root): 37 | 38 | - `npm run start` – Launch webapp (`:8000`) and API Functions host (`:7071`) concurrently 39 | - `npm run build` – Build all workspaces 40 | - `npm run clean` – Clean build outputs 41 | - `npm run upload:docs` – Invoke ingestion script against local Functions host 42 | 43 | Backend (`packages/api`): 44 | 45 | - `npm run start` – Clean, build, start Functions host with TS watch 46 | - `npm run build` – TypeScript compile to `dist` 47 | 48 | Frontend (`packages/webapp`): 49 | 50 | - `npm run dev` – Vite dev server (port 8000) 51 | - `npm run build` – Production build 52 | 53 | ## Coding Guidelines 54 | 55 | - TypeScript strict-ish (reduced lint rules via XO config) balancing clarity for newcomers 56 | - Prettier enforced via lint-staged pre-commit hook 57 | - Favor explicit imports; keep functions small & composable 58 | 59 | ## Security Considerations 60 | 61 | - Secrets managed via Azure (Function App / Static Web App settings) – Avoid committing secrets 62 | - Test artifacts (traces, screenshots) must not include secrets → scrub logs & env variable exposure 63 | - Principle of least privilege in Bicep role assignments 64 | 65 | ## Extension Points 66 | 67 | - Swappable embeddings & LLM providers (Azure OpenAI ↔ Ollama) with minimal config changes 68 | 69 | ## Environment Variables (High-Level) 70 | 71 | - Azure OpenAI endpoints 72 | - Cosmos DB connection / database name 73 | - Blob storage account & container 74 | -------------------------------------------------------------------------------- /infra/core/host/functions.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure Function in an existing Azure App Service plan.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | // Reference Properties 7 | param applicationInsightsName string = '' 8 | param appServicePlanId string 9 | param keyVaultName string = '' 10 | param managedIdentity bool = !empty(keyVaultName) || storageManagedIdentity 11 | param storageAccountName string 12 | param storageManagedIdentity bool = false 13 | param virtualNetworkSubnetId string = '' 14 | 15 | // Runtime Properties 16 | @allowed([ 17 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' 18 | ]) 19 | param runtimeName string 20 | param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}' 21 | param runtimeVersion string 22 | 23 | // Function Settings 24 | @allowed([ 25 | '~4', '~3', '~2', '~1' 26 | ]) 27 | param extensionVersion string = '~4' 28 | 29 | // Microsoft.Web/sites Properties 30 | param kind string = 'functionapp,linux' 31 | 32 | // Microsoft.Web/sites/config 33 | param allowedOrigins array = [] 34 | param alwaysOn bool = true 35 | param appCommandLine string = '' 36 | @secure() 37 | param appSettings object = {} 38 | param clientAffinityEnabled bool = false 39 | param enableOryxBuild bool = contains(kind, 'linux') 40 | param functionAppScaleLimit int = -1 41 | param linuxFxVersion string = runtimeNameAndVersion 42 | param minimumElasticInstanceCount int = -1 43 | param numberOfWorkers int = -1 44 | param scmDoBuildDuringDeployment bool = true 45 | param use32BitWorkerProcess bool = false 46 | param healthCheckPath string = '' 47 | 48 | module functions 'appservice.bicep' = { 49 | name: '${name}-functions' 50 | params: { 51 | name: name 52 | location: location 53 | tags: tags 54 | allowedOrigins: allowedOrigins 55 | alwaysOn: alwaysOn 56 | appCommandLine: appCommandLine 57 | applicationInsightsName: applicationInsightsName 58 | appServicePlanId: appServicePlanId 59 | appSettings: union(appSettings, { 60 | FUNCTIONS_EXTENSION_VERSION: extensionVersion 61 | FUNCTIONS_WORKER_RUNTIME: runtimeName 62 | }, storageManagedIdentity ? { 63 | AzureWebJobsStorage__accountName: storage.name 64 | } : { 65 | AzureWebJobsStorage: 'DefaultEndpointsProtocol=https;AccountName=${storage.name};AccountKey=${storage.listKeys().keys[0].value};EndpointSuffix=${environment().suffixes.storage}' 66 | }) 67 | clientAffinityEnabled: clientAffinityEnabled 68 | enableOryxBuild: enableOryxBuild 69 | functionAppScaleLimit: functionAppScaleLimit 70 | healthCheckPath: healthCheckPath 71 | keyVaultName: keyVaultName 72 | kind: kind 73 | linuxFxVersion: linuxFxVersion 74 | managedIdentity: managedIdentity 75 | minimumElasticInstanceCount: minimumElasticInstanceCount 76 | numberOfWorkers: numberOfWorkers 77 | runtimeName: runtimeName 78 | runtimeVersion: runtimeVersion 79 | runtimeNameAndVersion: runtimeNameAndVersion 80 | scmDoBuildDuringDeployment: scmDoBuildDuringDeployment 81 | use32BitWorkerProcess: use32BitWorkerProcess 82 | virtualNetworkSubnetId: virtualNetworkSubnetId 83 | } 84 | } 85 | 86 | module storageOwnerRole '../../core/security/role.bicep' = if (storageManagedIdentity) { 87 | name: 'search-index-contrib-role-api' 88 | params: { 89 | principalId: functions.outputs.identityPrincipalId 90 | // Storage Blob Data Contributor 91 | roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' 92 | principalType: 'ServicePrincipal' 93 | } 94 | } 95 | 96 | resource storage 'Microsoft.Storage/storageAccounts@2021-09-01' existing = { 97 | name: storageAccountName 98 | } 99 | 100 | output id string = functions.outputs.id 101 | output identityPrincipalId string = managedIdentity ? functions.outputs.identityPrincipalId : '' 102 | output name string = functions.outputs.name 103 | output uri string = functions.outputs.uri 104 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "serverless-chat-langchainjs", 3 | "version": "1.0.0", 4 | "description": "Build your own serverless AI chat with Retrieval-Augmented-Generation using LangChain.js, TypeScript and Azure", 5 | "private": true, 6 | "type": "module", 7 | "scripts": { 8 | "start": "concurrently \"npm:start:*\" --kill-others", 9 | "start:webapp": "npm run dev --workspace=webapp", 10 | "start:api": "npm run start --workspace=api", 11 | "build": "npm run build --workspaces --if-present", 12 | "build:webapp": "npm run build --workspace=webapp", 13 | "build:api": "npm run build --workspace=api", 14 | "clean": "npm run clean --workspaces --if-present", 15 | "upload:docs": "node scripts/upload-documents.js http://localhost:7071", 16 | "lint": "xo", 17 | "lint:fix": "xo --fix", 18 | "format": "prettier --list-different --write .", 19 | "prepare": "simple-git-hooks || echo 'simple-git-hooks install skipped'" 20 | }, 21 | "repository": { 22 | "type": "git", 23 | "url": "https://github.com/Azure-Samples/serverless-chat-langchainjs.git" 24 | }, 25 | "homepage": "https://github.com/Azure-Samples/serverless-chat-langchainjs", 26 | "bugs": { 27 | "url": "https://github.com/Azure-Samples/serverless-chat-langchainjs/issues" 28 | }, 29 | "keywords": [ 30 | "langchainjs", 31 | "chatbot", 32 | "retrieval-augmented-generation", 33 | "serverless", 34 | "typescript", 35 | "azure", 36 | "azure-functions", 37 | "azure-static-web-apps", 38 | "azure-ai-search", 39 | "mongodb" 40 | ], 41 | "author": "Microsoft", 42 | "license": "MIT", 43 | "workspaces": [ 44 | "packages/*" 45 | ], 46 | "devDependencies": { 47 | "concurrently": "^9.0.0", 48 | "lint-staged": "^16.0.0", 49 | "prettier": "^3.0.3", 50 | "rimraf": "^6.0.1", 51 | "simple-git-hooks": "^2.9.0", 52 | "typescript": "^5.3.3", 53 | "xo": "^0.60.0" 54 | }, 55 | "engines": { 56 | "node": ">=20", 57 | "npm": ">=10" 58 | }, 59 | "simple-git-hooks": { 60 | "pre-commit": "npx lint-staged" 61 | }, 62 | "lint-staged": { 63 | "*.{js,ts,md,yaml,yml,html,css}": "prettier --write", 64 | "*.{js,ts}": "xo --fix" 65 | }, 66 | "xo": { 67 | "space": true, 68 | "prettier": true, 69 | "envs": [ 70 | "node" 71 | ], 72 | "rules": { 73 | "@typescript-eslint/triple-slash-reference": "off", 74 | "@typescript-eslint/naming-convention": "off", 75 | "@typescript-eslint/consistent-type-definitions": "off", 76 | "@typescript-eslint/member-ordering": "off", 77 | "@typescript-eslint/no-unsafe-assignment": "off", 78 | "@typescript-eslint/prefer-nullish-coalescing": "off", 79 | "@typescript-eslint/no-unsafe-return": "off", 80 | "@typescript-eslint/no-unsafe-argument": "off", 81 | "@typescript-eslint/consistent-type-imports": "off", 82 | "@typescript-eslint/no-empty-function": "off", 83 | "@typescript-eslint/no-floating-promises": "off", 84 | "@typescript-eslint/ban-types": "off", 85 | "import/no-unassigned-import": "off", 86 | "import/extensions": "off", 87 | "n/prefer-global/process": "off", 88 | "no-new-func": "off", 89 | "unicorn/prefer-global-this": "off", 90 | "unicorn/prefer-module": "off", 91 | "unicorn/no-new-array": "off", 92 | "unicorn/prevent-abbreviations": [ 93 | "error", 94 | { 95 | "allowList": { 96 | "combineDocsChain": true, 97 | "env": true, 98 | "Db": true 99 | } 100 | } 101 | ] 102 | } 103 | }, 104 | "prettier": { 105 | "tabWidth": 2, 106 | "semi": true, 107 | "singleQuote": true, 108 | "printWidth": 120, 109 | "bracketSpacing": true, 110 | "overrides": [ 111 | { 112 | "files": [ 113 | "*.json" 114 | ], 115 | "options": { 116 | "parser": "json" 117 | } 118 | } 119 | ] 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to [project-title] 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 5 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 6 | 7 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 8 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 9 | provided by the bot. You will only need to do this once across all repos using our CLA. 10 | 11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | 15 | - [Code of Conduct](#coc) 16 | - [Issues and Bugs](#issue) 17 | - [Feature Requests](#feature) 18 | - [Submission Guidelines](#submit) 19 | 20 | ## Code of Conduct 21 | 22 | Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 23 | 24 | ## Found an Issue? 25 | 26 | If you find a bug in the source code or a mistake in the documentation, you can help us by 27 | [submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can 28 | [submit a Pull Request](#submit-pr) with a fix. 29 | 30 | ## Want a Feature? 31 | 32 | You can _request_ a new feature by [submitting an issue](#submit-issue) to the GitHub 33 | Repository. If you would like to _implement_ a new feature, please submit an issue with 34 | a proposal for your work first, to be sure that we can use it. 35 | 36 | - **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr). 37 | 38 | ## Submission Guidelines 39 | 40 | ### Submitting an Issue 41 | 42 | Before you submit an issue, search the archive, maybe your question was already answered. 43 | 44 | If your issue appears to be a bug, and hasn't been reported, open a new issue. 45 | Help us to maximize the effort we can spend fixing issues and adding new 46 | features, by not reporting duplicate issues. Providing the following information will increase the 47 | chances of your issue being dealt with quickly: 48 | 49 | - **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps 50 | - **Version** - what version is affected (e.g. 0.1.2) 51 | - **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you 52 | - **Browsers and Operating System** - is this a problem with all browsers? 53 | - **Reproduce the Error** - provide a live example or a unambiguous set of steps 54 | - **Related Issues** - has a similar issue been reported before? 55 | - **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be 56 | causing the problem (line of code or commit) 57 | 58 | You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/[organization-name]/[repository-name]/issues/new]. 59 | 60 | ### Submitting a Pull Request (PR) 61 | 62 | Before you submit your Pull Request (PR) consider the following guidelines: 63 | 64 | - Search the repository (https://github.com/[organization-name]/[repository-name]/pulls) for an open or closed PR 65 | that relates to your submission. You don't want to duplicate effort. 66 | 67 | - Make your changes in a new git fork: 68 | 69 | - Commit your changes using a descriptive commit message 70 | - Push your fork to GitHub: 71 | - In GitHub, create a pull request 72 | - If we suggest changes then: 73 | 74 | - Make the required updates. 75 | - Rebase your fork and force push to your GitHub repository (this will update your Pull Request): 76 | 77 | ```shell 78 | git rebase master -i 79 | git push -f 80 | ``` 81 | 82 | That's it! Thank you for your contribution! 83 | -------------------------------------------------------------------------------- /packages/api/src/functions/documents-post.ts: -------------------------------------------------------------------------------- 1 | import fs from 'node:fs/promises'; 2 | import { type HttpRequest, type HttpResponseInit, type InvocationContext, app } from '@azure/functions'; 3 | import { AzureOpenAIEmbeddings } from '@langchain/openai'; 4 | import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; 5 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 6 | import { AzureCosmosDBNoSQLVectorStore } from '@langchain/azure-cosmosdb'; 7 | import { OllamaEmbeddings } from '@langchain/ollama'; 8 | import { FaissStore } from '@langchain/community/vectorstores/faiss'; 9 | import 'dotenv/config'; 10 | import { BlobServiceClient } from '@azure/storage-blob'; 11 | import { badRequest, serviceUnavailable, ok } from '../http-response.js'; 12 | import { ollamaEmbeddingsModel, faissStoreFolder } from '../constants.js'; 13 | import { getAzureOpenAiTokenProvider, getCredentials } from '../security.js'; 14 | 15 | export async function postDocuments(request: HttpRequest, context: InvocationContext): Promise { 16 | const storageUrl = process.env.AZURE_STORAGE_URL; 17 | const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME; 18 | const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT; 19 | 20 | try { 21 | // Get the uploaded file from the request 22 | const parsedForm = await request.formData(); 23 | 24 | if (!parsedForm.has('file')) { 25 | return badRequest('"file" field not found in form data.'); 26 | } 27 | 28 | // Type mismatch between Node.js FormData and Azure Functions FormData 29 | const file = parsedForm.get('file') as any as File; 30 | const filename = file.name; 31 | 32 | // Extract text from the PDF 33 | const loader = new PDFLoader(file, { 34 | splitPages: false, 35 | }); 36 | const rawDocument = await loader.load(); 37 | rawDocument[0].metadata.source = filename; 38 | 39 | // Split the text into smaller chunks 40 | const splitter = new RecursiveCharacterTextSplitter({ 41 | chunkSize: 1500, 42 | chunkOverlap: 100, 43 | }); 44 | const documents = await splitter.splitDocuments(rawDocument); 45 | 46 | // Generate embeddings and save in database 47 | if (azureOpenAiEndpoint) { 48 | const credentials = getCredentials(); 49 | const azureADTokenProvider = getAzureOpenAiTokenProvider(); 50 | 51 | // Initialize embeddings model and vector database 52 | const embeddings = new AzureOpenAIEmbeddings({ azureADTokenProvider }); 53 | await AzureCosmosDBNoSQLVectorStore.fromDocuments(documents, embeddings, { credentials }); 54 | } else { 55 | // If no environment variables are set, it means we are running locally 56 | context.log('No Azure OpenAI endpoint set, using Ollama models and local DB'); 57 | const embeddings = new OllamaEmbeddings({ model: ollamaEmbeddingsModel }); 58 | const folderExists = await checkFolderExists(faissStoreFolder); 59 | if (folderExists) { 60 | const store = await FaissStore.load(faissStoreFolder, embeddings); 61 | await store.addDocuments(documents); 62 | await store.save(faissStoreFolder); 63 | } else { 64 | const store = await FaissStore.fromDocuments(documents, embeddings, {}); 65 | await store.save(faissStoreFolder); 66 | } 67 | } 68 | 69 | if (storageUrl && containerName) { 70 | // Upload the PDF file to Azure Blob Storage 71 | context.log(`Uploading file to blob storage: "${containerName}/${filename}"`); 72 | const credentials = getCredentials(); 73 | const blobServiceClient = new BlobServiceClient(storageUrl, credentials); 74 | const containerClient = blobServiceClient.getContainerClient(containerName); 75 | const blockBlobClient = containerClient.getBlockBlobClient(filename); 76 | const buffer = await file.arrayBuffer(); 77 | await blockBlobClient.upload(buffer, file.size, { 78 | blobHTTPHeaders: { blobContentType: 'application/pdf' }, 79 | }); 80 | } else { 81 | context.log('No Azure Blob Storage connection string set, skipping upload.'); 82 | } 83 | 84 | return ok({ message: 'PDF file uploaded successfully.' }); 85 | } catch (_error: unknown) { 86 | const error = _error as Error; 87 | context.error(`Error when processing document-post request: ${error.message}`); 88 | 89 | return serviceUnavailable('Service temporarily unavailable. Please try again later.'); 90 | } 91 | } 92 | 93 | async function checkFolderExists(folderPath: string): Promise { 94 | try { 95 | const stats = await fs.stat(folderPath); 96 | return stats.isDirectory(); 97 | } catch { 98 | return false; 99 | } 100 | } 101 | 102 | app.http('documents-post', { 103 | route: 'documents', 104 | methods: ['POST'], 105 | authLevel: 'anonymous', 106 | handler: postDocuments, 107 | }); 108 | -------------------------------------------------------------------------------- /infra/core/host/appservice.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure App Service in an existing Azure App Service plan.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | // Reference Properties 7 | param applicationInsightsName string = '' 8 | param appServicePlanId string 9 | param keyVaultName string = '' 10 | param managedIdentity bool = !empty(keyVaultName) 11 | 12 | // Runtime Properties 13 | @allowed([ 14 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' 15 | ]) 16 | param runtimeName string 17 | param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}' 18 | param runtimeVersion string 19 | 20 | // Microsoft.Web/sites Properties 21 | param kind string = 'app,linux' 22 | 23 | // Microsoft.Web/sites/config 24 | param allowedOrigins array = [] 25 | param alwaysOn bool = true 26 | param appCommandLine string = '' 27 | @secure() 28 | param appSettings object = {} 29 | param clientAffinityEnabled bool = false 30 | param enableOryxBuild bool = contains(kind, 'linux') 31 | param functionAppScaleLimit int = -1 32 | param linuxFxVersion string = runtimeNameAndVersion 33 | param minimumElasticInstanceCount int = -1 34 | param numberOfWorkers int = -1 35 | param scmDoBuildDuringDeployment bool = false 36 | param use32BitWorkerProcess bool = false 37 | param ftpsState string = 'FtpsOnly' 38 | param healthCheckPath string = '' 39 | param virtualNetworkSubnetId string = '' 40 | 41 | resource appService 'Microsoft.Web/sites@2022-03-01' = { 42 | name: name 43 | location: location 44 | tags: tags 45 | kind: kind 46 | properties: { 47 | serverFarmId: appServicePlanId 48 | siteConfig: { 49 | linuxFxVersion: linuxFxVersion 50 | alwaysOn: alwaysOn 51 | ftpsState: ftpsState 52 | minTlsVersion: '1.2' 53 | appCommandLine: appCommandLine 54 | numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null 55 | minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null 56 | use32BitWorkerProcess: use32BitWorkerProcess 57 | functionAppScaleLimit: functionAppScaleLimit != -1 ? functionAppScaleLimit : null 58 | healthCheckPath: healthCheckPath 59 | cors: { 60 | allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) 61 | } 62 | } 63 | clientAffinityEnabled: clientAffinityEnabled 64 | httpsOnly: true 65 | virtualNetworkSubnetId: !empty(virtualNetworkSubnetId) ? virtualNetworkSubnetId : null 66 | } 67 | 68 | identity: { type: managedIdentity ? 'SystemAssigned' : 'None' } 69 | 70 | resource basicPublishingCredentialsPoliciesFtp 'basicPublishingCredentialsPolicies' = { 71 | name: 'ftp' 72 | properties: { 73 | allow: false 74 | } 75 | } 76 | 77 | resource basicPublishingCredentialsPoliciesScm 'basicPublishingCredentialsPolicies' = { 78 | name: 'scm' 79 | properties: { 80 | allow: false 81 | } 82 | } 83 | } 84 | 85 | // Updates to the single Microsoft.sites/web/config resources that need to be performed sequentially 86 | // sites/web/config 'appsettings' 87 | module configAppSettings 'appservice-appsettings.bicep' = { 88 | name: '${name}-appSettings' 89 | params: { 90 | name: appService.name 91 | appSettings: union(appSettings, 92 | { 93 | SCM_DO_BUILD_DURING_DEPLOYMENT: string(scmDoBuildDuringDeployment) 94 | ENABLE_ORYX_BUILD: string(enableOryxBuild) 95 | }, 96 | runtimeName == 'python' && appCommandLine == '' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true'} : {}, 97 | !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {}, 98 | !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {}) 99 | } 100 | } 101 | 102 | // sites/web/config 'logs' 103 | resource configLogs 'Microsoft.Web/sites/config@2022-03-01' = { 104 | name: 'logs' 105 | parent: appService 106 | properties: { 107 | applicationLogs: { fileSystem: { level: 'Verbose' } } 108 | detailedErrorMessages: { enabled: true } 109 | failedRequestsTracing: { enabled: true } 110 | httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } } 111 | } 112 | dependsOn: [configAppSettings] 113 | } 114 | 115 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) { 116 | name: keyVaultName 117 | } 118 | 119 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) { 120 | name: applicationInsightsName 121 | } 122 | 123 | output id string = appService.id 124 | output identityPrincipalId string = managedIdentity ? appService.identity.principalId : '' 125 | output name string = appService.name 126 | output uri string = 'https://${appService.properties.defaultHostName}' 127 | -------------------------------------------------------------------------------- /infra/core/host/functions-flex.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure Function (flex consumption) in an existing Azure App Service plan.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | // Reference Properties 7 | param applicationInsightsName string = '' 8 | param appServicePlanId string 9 | param keyVaultName string = '' 10 | param virtualNetworkSubnetId string = '' 11 | 12 | // Runtime Properties 13 | @allowed([ 14 | 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' 15 | ]) 16 | param runtimeName string 17 | @allowed(['3.10', '3.11', '7.4', '8.0', '10', '11', '17', '20']) 18 | param runtimeVersion string 19 | 20 | // Microsoft.Web/sites Properties 21 | param kind string = 'functionapp,linux' 22 | 23 | // Microsoft.Web/sites/config 24 | param allowedOrigins array = [] 25 | param alwaysOn bool = true 26 | param appCommandLine string = '' 27 | @secure() 28 | param appSettings object = {} 29 | param clientAffinityEnabled bool = false 30 | param maximumInstanceCount int = 800 31 | param instanceMemoryMB int = 2048 32 | param minimumElasticInstanceCount int = -1 33 | param numberOfWorkers int = -1 34 | param healthCheckPath string = '' 35 | param storageAccountName string 36 | 37 | resource functions 'Microsoft.Web/sites@2023-12-01' = { 38 | name: name 39 | location: location 40 | tags: tags 41 | kind: kind 42 | properties: { 43 | serverFarmId: appServicePlanId 44 | siteConfig: { 45 | ftpsState: 'FtpsOnly' 46 | alwaysOn: alwaysOn 47 | minTlsVersion: '1.2' 48 | appCommandLine: appCommandLine 49 | numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null 50 | minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null 51 | healthCheckPath: healthCheckPath 52 | cors: { 53 | allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) 54 | } 55 | } 56 | functionAppConfig: { 57 | deployment: { 58 | storage: { 59 | type: 'blobContainer' 60 | value: '${storage.properties.primaryEndpoints.blob}${name}' 61 | authentication: { 62 | type: 'SystemAssignedIdentity' 63 | } 64 | } 65 | } 66 | scaleAndConcurrency: { 67 | maximumInstanceCount: maximumInstanceCount 68 | instanceMemoryMB: instanceMemoryMB 69 | } 70 | runtime: { 71 | name: runtimeName 72 | version: runtimeVersion 73 | } 74 | } 75 | clientAffinityEnabled: clientAffinityEnabled 76 | httpsOnly: true 77 | virtualNetworkSubnetId: !empty(virtualNetworkSubnetId) ? virtualNetworkSubnetId : null 78 | } 79 | 80 | identity: { type: 'SystemAssigned' } 81 | } 82 | 83 | // Updates to the single Microsoft.sites/web/config resources that need to be performed sequentially 84 | // sites/web/config 'appsettings' 85 | module configAppSettings 'appservice-appsettings.bicep' = { 86 | name: '${name}-appSettings' 87 | params: { 88 | name: functions.name 89 | appSettings: union(appSettings, 90 | { 91 | AzureWebJobsStorage__accountName: storage.name 92 | }, 93 | runtimeName == 'python' && appCommandLine == '' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true'} : {}, 94 | !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {}, 95 | !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {}) 96 | } 97 | } 98 | 99 | // sites/web/config 'logs' 100 | resource configLogs 'Microsoft.Web/sites/config@2022-03-01' = { 101 | name: 'logs' 102 | parent: functions 103 | properties: { 104 | applicationLogs: { fileSystem: { level: 'Verbose' } } 105 | detailedErrorMessages: { enabled: true } 106 | failedRequestsTracing: { enabled: true } 107 | httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } } 108 | } 109 | dependsOn: [configAppSettings] 110 | } 111 | 112 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) { 113 | name: keyVaultName 114 | } 115 | 116 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) { 117 | name: applicationInsightsName 118 | } 119 | 120 | resource storage 'Microsoft.Storage/storageAccounts@2021-09-01' existing = { 121 | name: storageAccountName 122 | } 123 | 124 | var storageContributorRole = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') 125 | 126 | resource storageContainer 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 127 | scope: storage // Use when specifying a scope that is different than the deployment scope 128 | name: guid(subscription().id, resourceGroup().id, functions.id, storageContributorRole) 129 | properties: { 130 | roleDefinitionId: storageContributorRole 131 | principalType: 'ServicePrincipal' 132 | principalId: functions.identity.principalId 133 | } 134 | } 135 | 136 | output id string = functions.id 137 | output identityPrincipalId string = functions.identity.principalId 138 | output name string = functions.name 139 | output uri string = 'https://${functions.properties.defaultHostName}' 140 | -------------------------------------------------------------------------------- /infra/abbreviations.json: -------------------------------------------------------------------------------- 1 | { 2 | "analysisServicesServers": "as", 3 | "apiManagementService": "apim-", 4 | "appConfigurationStores": "appcs-", 5 | "appManagedEnvironments": "cae-", 6 | "appContainerApps": "ca-", 7 | "authorizationPolicyDefinitions": "policy-", 8 | "automationAutomationAccounts": "aa-", 9 | "blueprintBlueprints": "bp-", 10 | "blueprintBlueprintsArtifacts": "bpa-", 11 | "cacheRedis": "redis-", 12 | "cdnProfiles": "cdnp-", 13 | "cdnProfilesEndpoints": "cdne-", 14 | "cognitiveServicesAccounts": "cog-", 15 | "cognitiveServicesFormRecognizer": "cog-fr-", 16 | "cognitiveServicesTextAnalytics": "cog-ta-", 17 | "cognitiveServicesSpeech": "cog-sp-", 18 | "computeAvailabilitySets": "avail-", 19 | "computeCloudServices": "cld-", 20 | "computeDiskEncryptionSets": "des", 21 | "computeDisks": "disk", 22 | "computeDisksOs": "osdisk", 23 | "computeGalleries": "gal", 24 | "computeSnapshots": "snap-", 25 | "computeVirtualMachines": "vm", 26 | "computeVirtualMachineScaleSets": "vmss-", 27 | "containerInstanceContainerGroups": "ci", 28 | "containerRegistryRegistries": "cr", 29 | "containerServiceManagedClusters": "aks-", 30 | "databricksWorkspaces": "dbw-", 31 | "dataFactoryFactories": "adf-", 32 | "dataLakeAnalyticsAccounts": "dla", 33 | "dataLakeStoreAccounts": "dls", 34 | "dataMigrationServices": "dms-", 35 | "dBforMySQLServers": "mysql-", 36 | "dBforPostgreSQLServers": "psql-", 37 | "devicesIotHubs": "iot-", 38 | "devicesProvisioningServices": "provs-", 39 | "devicesProvisioningServicesCertificates": "pcert-", 40 | "documentDBDatabaseAccounts": "cosmos-", 41 | "eventGridDomains": "evgd-", 42 | "eventGridDomainsTopics": "evgt-", 43 | "eventGridEventSubscriptions": "evgs-", 44 | "eventHubNamespaces": "evhns-", 45 | "eventHubNamespacesEventHubs": "evh-", 46 | "hdInsightClustersHadoop": "hadoop-", 47 | "hdInsightClustersHbase": "hbase-", 48 | "hdInsightClustersKafka": "kafka-", 49 | "hdInsightClustersMl": "mls-", 50 | "hdInsightClustersSpark": "spark-", 51 | "hdInsightClustersStorm": "storm-", 52 | "hybridComputeMachines": "arcs-", 53 | "insightsActionGroups": "ag-", 54 | "insightsComponents": "appi-", 55 | "keyVaultVaults": "kv-", 56 | "kubernetesConnectedClusters": "arck", 57 | "kustoClusters": "dec", 58 | "kustoClustersDatabases": "dedb", 59 | "loadTesting": "lt-", 60 | "logicIntegrationAccounts": "ia-", 61 | "logicWorkflows": "logic-", 62 | "machineLearningServicesWorkspaces": "mlw-", 63 | "managedIdentityUserAssignedIdentities": "id-", 64 | "managementManagementGroups": "mg-", 65 | "migrateAssessmentProjects": "migr-", 66 | "networkApplicationGateways": "agw-", 67 | "networkApplicationSecurityGroups": "asg-", 68 | "networkAzureFirewalls": "afw-", 69 | "networkBastionHosts": "bas-", 70 | "networkConnections": "con-", 71 | "networkDnsZones": "dnsz-", 72 | "networkExpressRouteCircuits": "erc-", 73 | "networkFirewallPolicies": "afwp-", 74 | "networkFirewallPoliciesWebApplication": "waf", 75 | "networkFirewallPoliciesRuleGroups": "wafrg", 76 | "networkFrontDoors": "fd-", 77 | "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-", 78 | "networkLoadBalancersExternal": "lbe-", 79 | "networkLoadBalancersInternal": "lbi-", 80 | "networkLoadBalancersInboundNatRules": "rule-", 81 | "networkLocalNetworkGateways": "lgw-", 82 | "networkNatGateways": "ng-", 83 | "networkNetworkInterfaces": "nic-", 84 | "networkNetworkSecurityGroups": "nsg-", 85 | "networkNetworkSecurityGroupsSecurityRules": "nsgsr-", 86 | "networkNetworkWatchers": "nw-", 87 | "networkPrivateDnsZones": "pdnsz-", 88 | "networkPrivateLinkServices": "pl-", 89 | "networkPublicIPAddresses": "pip-", 90 | "networkPublicIPPrefixes": "ippre-", 91 | "networkRouteFilters": "rf-", 92 | "networkRouteTables": "rt-", 93 | "networkRouteTablesRoutes": "udr-", 94 | "networkTrafficManagerProfiles": "traf-", 95 | "networkVirtualNetworkGateways": "vgw-", 96 | "networkVirtualNetworks": "vnet-", 97 | "networkVirtualNetworksSubnets": "snet-", 98 | "networkVirtualNetworksVirtualNetworkPeerings": "peer-", 99 | "networkVirtualWans": "vwan-", 100 | "networkVpnGateways": "vpng-", 101 | "networkVpnGatewaysVpnConnections": "vcn-", 102 | "networkVpnGatewaysVpnSites": "vst-", 103 | "notificationHubsNamespaces": "ntfns-", 104 | "notificationHubsNamespacesNotificationHubs": "ntf-", 105 | "operationalInsightsWorkspaces": "log-", 106 | "portalDashboards": "dash-", 107 | "powerBIDedicatedCapacities": "pbi-", 108 | "purviewAccounts": "pview-", 109 | "recoveryServicesVaults": "rsv-", 110 | "resourcesResourceGroups": "rg-", 111 | "searchSearchServices": "srch-", 112 | "serviceBusNamespaces": "sb-", 113 | "serviceBusNamespacesQueues": "sbq-", 114 | "serviceBusNamespacesTopics": "sbt-", 115 | "serviceEndPointPolicies": "se-", 116 | "serviceFabricClusters": "sf-", 117 | "signalRServiceSignalR": "sigr", 118 | "sqlManagedInstances": "sqlmi-", 119 | "sqlServers": "sql-", 120 | "sqlServersDataWarehouse": "sqldw-", 121 | "sqlServersDatabases": "sqldb-", 122 | "sqlServersDatabasesStretch": "sqlstrdb-", 123 | "storageStorageAccounts": "st", 124 | "storageStorageAccountsVm": "stvm", 125 | "storSimpleManagers": "ssimp", 126 | "streamAnalyticsCluster": "asa-", 127 | "synapseWorkspaces": "syn", 128 | "synapseWorkspacesAnalyticsWorkspaces": "synw", 129 | "synapseWorkspacesSqlPoolsDedicated": "syndp", 130 | "synapseWorkspacesSqlPoolsSpark": "synsp", 131 | "timeSeriesInsightsEnvironments": "tsi-", 132 | "webServerFarms": "plan-", 133 | "webSitesAppService": "app-", 134 | "webSitesAppServiceEnvironment": "ase-", 135 | "webSitesFunctions": "func-", 136 | "webStaticSites": "stapp-" 137 | } 138 | -------------------------------------------------------------------------------- /docs/tutorial/03-understanding-rag.md: -------------------------------------------------------------------------------- 1 | # Understanding the RAG (Retrieval Augmented Generation) architecture 2 | 3 | In this section, we'll understand what the RAG (Retrieval Augmented Generation) architecture is, how it works, and why it's important for integration with AI models. 4 | 5 | ## What is RAG (Retrieval Augmented Generation)? 6 | 7 | RAG is an architecture that integrates external information retrieval into the response generation process of Large Language Models (LLMs) 8 | 9 | It allows searching for a specific database, in addition to the pre-trained knowledge base, to significantly improve the accuracy and relevance of answers. 10 | 11 | In the business context, RAG architecture enables generative artificial intelligence (AI) to focus exclusively on company-relevant content. This allows AI to work with specific information, such as documents, vectorized images, and other types of business data. To achieve this, AI models must be capable of understanding and processing these specific types of content. 12 | 13 | In simple terms, RAG architecture enables organizations to use AI to analyze and generate information from their specific data, including texts and images that are related to their business, in a controlled and targeted manner. 14 | 15 | ## RAG Architecture Components 16 | 17 | ![RAG](./images/rag.png) 18 | 19 | Implement the standard RAG architecture following this flow. It should include: 20 | 21 | 1. **Ingestion:** 22 | 23 | - **How it works:** Indexing is the process of organizing data in a vector database to make it easily searchable. This critical step allows the RAG to access relevant information quickly when responding to a query. 24 | - **Mechanism:** Starts with the collection of documents, which are divided into smaller chunks by a **splitter**. Complex algorithms transform each piece of text into an embedding vector, which is then stored in the database for efficient retrieval of similar information. 25 | 26 | 2. **Retrieval:** 27 | 28 | - **How it works:** This process uses vector similarity search to find the most relevant documents or passages to answer a query. 29 | - **Mechanisms:** 30 | 31 | - **Sparse vector representations:** Use sparse vector representations to be texts through vectors that highlight specific characteristics of the data. These representations are called **sparse** because they tend to have many zero values, focusing only on specific aspects such as the presence of certain key words or phrases. This type of representation is useful for research based on specific terms but may not capture the full semantics of the text well. 32 | 33 | - **Dense vector embeddings:** Use language models to encode queries and passages in dense vectors, which are stored in vector databases and allow retrieval based on semantic similarity. 34 | 35 | - **Hybrid search:** Combines the techniques of keyword search and semantic search to take advantage of the strengths of both types of representations. Hybrid search improves the quality of results by maximizing relevance and precision of retrieved information. 36 | 37 | 3. **Generation:** 38 | 39 | - **How it works:** With the most relevant passages retrieved, the generator's task is to produce a final response, synthesizing and expressing this information in natural language. 40 | - **Mechanisms:** The generator, which is typically a model like GPT, BART, or T5, uses both the query and the relevant documents found by retriever to create its response. It is important to note that the generator relies on the retriever to find the relevant documents. 41 | 42 | ## Why is RAG architecture important for integration with AI models? 43 | 44 | RAG architecture is useful for AI models in business contexts. It allows for flexible and efficient integration with various databases, improving the relevance and accuracy of generated responses while customizing the application to meet specific business needs. 45 | 46 | Here are some advantages of integrating RAG architecture with AI models: 47 | 48 | ### Adapting the RAG Architecture to different Enterprise Databases 49 | 50 | The RAG architecture can be configured to work with a variety of databases. It can adapt to access and incorporate information from various sources as needed. This is possible because the retrieval component of the architecture can interact with different data formats and structures, from traditional relational databases to document repositories or content management systems. 51 | 52 | Examples of data types that can be integrated: 53 | 54 | 1. **Textual documents:** Internal documents, analysis reports, procedure manuals, and technical documentation. RAG can retrieve relevant information from these documents to answer specific questions that require detailed knowledge held there. 55 | 56 | 2. **Relational Databases:** Structured data such as customer records, financial transactions, and inventory records. Although traditionally not the focus of LLMs, by integrating RAG, AI models can extract and use information from tables and databases to enrich its answers or perform specific analyses. 57 | 58 | 3. **Social media data and customer feedback:** Comments and reviews that can be used to better understand market trends, consumer sentiment, and to answer questions related to customer service. 59 | 60 | 4. **Image and Video Databases:** Through descriptions or metadata associated with media, RAG can retrieve pertinent visual information to answer queries involving image identification or visual content analysis. 61 | 62 | ### Applications of RAG Across Industries 63 | 64 | RAG has significant implications in many fields: 65 | 66 | - **Legal Research:** Legal professionals can access and review relevant case laws and precedents quickly. 67 | 68 | - **Medical Diagnosis:** Healthcare professionals can retrieve up-to-date patient records and research to support diagnosis and treatment plans. 69 | 70 | - **Customer Support:** Service agents can provide responses based on the latest product information and manuals. 71 | 72 | - **Market Analysis:** Analysts can use the latest market data and trends to support business decisions. 73 | 74 | - **Educational Content:** Educators can update their materials with the latest research and studies to ensure relevance and accuracy. 75 | 76 | > **Note:** To learn more about the RAG architecture, please refer to the official documentation of the Azure Cosmos DB Documentation service, which can be accessed [here](https://learn.microsoft.com/azure/cosmos-db/gen-ai/rag). 77 | 78 | ## Next Steps 79 | 80 | RAG architecture is a powerful tool for improving the accuracy and relevance of AI models's responses. It makes AI models a more effective solution for business scenarios and other contexts where access to specific information is essential. 81 | 82 | Now that we have a clear understanding of the RAG architecture, we can begin developing the functions for integration with AI models on the Front-end side. In the next section, we will start developing the `chat-post` function or the `chat` API for integration with AI models. 83 | 84 | **[⬅️ Back: Setting Up the Serverless Environment using Azure Functions](02-setting-up-azure-functions.md)** | **[Next: Developing the `chat` API ➡️ ](./04-preparing-understanding-language-models.md)** 85 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | ## Frequently Asked Questions 2 | 3 |
4 | What is Retrieval-Augmented Generation?
5 | 6 | Retrieval-Augmented Generation (RAG) is a method used in artificial intelligence, particularly in natural language processing, to generate text responses that are both contextually relevant and rich in content using AI models. 7 | 8 | At its core, RAG involves two main components: 9 | 10 | - **Retriever**: Think "_like a search engine_", finding relevant information from a knowledgebase, usually a vector database. In this sample, we're using Azure Cosmos DB for NoSQL as our vector database. 11 | 12 | - **Generator**: Acts like a writer, taking the prompt and information retrieved to create a response. We're using here a Large Language Model (LLM) for this task. 13 | 14 |
15 | Retrieval-Augmented Generation schema 16 |
17 | 18 |
19 | 20 |
21 | How can we upload additional documents without redeploying everything?
22 | 23 | To upload more documents, first put your PDF document in the `data/` folder, then use one of these commands depending on your environment. 24 | 25 | ### For local development 26 | 27 | Make sure your API is started by running `npm run start:api` from the root of the project. Then you can use one of the following commands to upload a new PDF document: 28 | 29 | ```bash 30 | # If you're using a POSIX shell 31 | curl -F "file=@data/" http://localhost:7071/api/documents 32 | 33 | # If you're using PowerShell 34 | Invoke-RestMethod -Uri "http://localhost:7071/api/documents" -Method Post -InFile "./data/" 35 | ``` 36 | 37 | You can also use the following command to reupload all PDFs file in the `/data` folder at once: 38 | 39 | ```bash 40 | npm run upload:docs 41 | ``` 42 | 43 | ### For the deployed version 44 | 45 | First you need to find the URL of the deployed function. You can either look at the `packages/api/.env` file and search for the `API_URI` variable, or run this command to get the URL: 46 | 47 | ```bash 48 | azd env get-values | grep API_URI 49 | ``` 50 | 51 | Then you can use the one of the following commands to upload a new PDF document: 52 | 53 | ```bash 54 | # If you're using a POSIX shell 55 | curl -F "file=@data/" /api/documents 56 | 57 | # If you're using PowerShell 58 | Invoke-RestMethod -Uri "/api/documents" -Method Post -InFile "./data/" 59 | ``` 60 | 61 | You can also use the following command to reupload all PDFs file in the `/data` folder at once: 62 | 63 | ```bash 64 | node scripts/upload-documents.js 65 | ``` 66 | 67 |
68 | 69 |
70 | Why do we need to break up the documents into chunks?
71 | 72 | Chunking allows us to limit the amount of information we send to the LLM due to token limits. By breaking up the content, it allows us to easily find potential chunks of text that we can inject and improve the relevance of the results. The method of chunking we use leverages a sliding window of text such that sentences that end one chunk will start the next. This allows us to reduce the chance of losing the context of the text. 73 | 74 |
75 | 76 |
77 | How do you change the models used in this sample?
78 | 79 | You can use the environment variables to change the chat and embeddings models used in this sample when deployed. 80 | Run these commands: 81 | 82 | ```bash 83 | azd env set AZURE_OPENAI_API_MODEL gpt-4 84 | azd env set AZURE_OPENAI_API_MODEL_VERSION 0125-preview 85 | azd env set AZURE_OPENAI_API_EMBEDDINGS_MODEL text-embedding-3-large 86 | azd env set AZURE_OPENAI_API_EMBEDDINGS_MODEL_VERSION 1 87 | ``` 88 | 89 | You may also need to adjust the capacity in `infra/main.bicep` file, depending on how much TPM your account is allowed. 90 | 91 | ### Local models 92 | 93 | To change the local models used by Ollama, you can edit the file `packages/api/src/constants.ts`: 94 | 95 | ```typescript 96 | export const ollamaEmbeddingsModel = 'nomic-embed-text:latest'; 97 | export const ollamaChatModel = 'llama3.1:latest'; 98 | ``` 99 | 100 | You can see the complete list of available models at https://ollama.ai/models. 101 | 102 | After changing the models, you also need to fetch the new models by running the command: 103 | 104 | ```bash 105 | ollama pull 106 | ``` 107 | 108 |
109 | 110 |
111 | What does the azd up command do?
112 | 113 | The `azd up` command comes from the [Azure Developer CLI](https://learn.microsoft.com/azure/developer/azure-developer-cli/overview), and takes care of both provisioning the Azure resources and deploying code to the selected Azure hosts. 114 | 115 | The `azd up` command uses the `azure.yaml` file combined with the infrastructure-as-code `.bicep` files in the `infra/` folder. The `azure.yaml` file for this project declares several "hooks" for the prepackage step and postprovision steps. The `up` command first runs the `prepackage` hook which installs Node dependencies and builds the TypeScript files. It then packages all the code (both frontend and backend services) into a zip file which it will deploy later. 116 | 117 | Next, it provisions the resources based on `main.bicep` and `main.parameters.json`. At that point, since there is no default value for the OpenAI resource location, it asks you to pick a location from a short list of available regions. Then it will send requests to Azure to provision all the required resources. With everything provisioned, it runs the `postprovision` hook to process the local data and add it to an Azure Cosmos DB index. 118 | 119 | Finally, it looks at `azure.yaml` to determine the Azure host (Functions and Static Web Apps, in this case) and uploads the zip to Azure. The `azd up` command is now complete, but it may take some time for the app to be fully available and working after the initial deploy. 120 | 121 | Related commands are `azd provision` for just provisioning (if infra files change) and `azd deploy` for just deploying updated app code. 122 | 123 |
124 | 125 |
126 | Why using Azure Cosmos DB for vector search? What about Azure AI Search?
127 | 128 | There are multiple Azure services that implement vector search capabilities, including Azure Cosmos DB. In this sample, we use Azure Cosmos DB for vector search because it's also a regular NoSQL database that can store any of your regular data workloads in addition to the vector search data. This makes it a versatile choice for a wide range of applications, all while keeping costs low by using a serverless tier. 129 | 130 | Azure AI Search is another option for vector search, but it's more focused on search capabilities: it provides more advanced vector search and hybrid search options, though it doesn't provide the same flexibility as Azure Cosmos DB. We also have a version of this sample that uses Azure AI Search, which you can find [here](https://github.com/Azure-Samples/serverless-chat-langchainjs/tree/ai-search). 131 | 132 | For more information about Azure vector search options, you can check out [this architecture guide](https://learn.microsoft.com/azure/architecture/guide/technology-choices/vector-search). 133 | 134 |
135 | 136 | 150 | -------------------------------------------------------------------------------- /packages/api/src/functions/chats-post.ts: -------------------------------------------------------------------------------- 1 | import { Readable } from 'node:stream'; 2 | import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions'; 3 | import { AIChatCompletionRequest, AIChatCompletionDelta } from '@microsoft/ai-chat-protocol'; 4 | import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/openai'; 5 | import { Embeddings } from '@langchain/core/embeddings'; 6 | import { AzureCosmsosDBNoSQLChatMessageHistory, AzureCosmosDBNoSQLVectorStore } from '@langchain/azure-cosmosdb'; 7 | import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system'; 8 | import { BaseChatModel } from '@langchain/core/language_models/chat_models'; 9 | import { RunnableWithMessageHistory } from '@langchain/core/runnables'; 10 | import { VectorStore } from '@langchain/core/vectorstores'; 11 | import { ChatOllama, OllamaEmbeddings } from '@langchain/ollama'; 12 | import { FaissStore } from '@langchain/community/vectorstores/faiss'; 13 | import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts'; 14 | import { createStuffDocumentsChain } from 'langchain/chains/combine_documents'; 15 | import { v4 as uuidv4 } from 'uuid'; 16 | import 'dotenv/config'; 17 | import { badRequest, data, serviceUnavailable } from '../http-response.js'; 18 | import { ollamaChatModel, ollamaEmbeddingsModel, faissStoreFolder } from '../constants.js'; 19 | import { getAzureOpenAiTokenProvider, getCredentials, getUserId } from '../security.js'; 20 | 21 | const ragSystemPrompt = `Assistant helps the Consto Real Estate company customers with questions and support requests. Be brief in your answers. Answer only plain text, DO NOT use Markdown. 22 | Answer ONLY with information from the sources below. If there isn't enough information in the sources, say you don't know. Do not generate answers that don't use the sources. If asking a clarifying question to the user would help, ask the question. 23 | If the user question is not in English, answer in the language used in the question. 24 | 25 | Each source has the format "[filename]: information". ALWAYS reference the source filename for every part used in the answer. Use the format "[filename]" to reference a source, for example: [info1.txt]. List each source separately, for example: [info1.txt][info2.pdf]. 26 | 27 | Generate 3 very brief follow-up questions that the user would likely ask next. 28 | Enclose the follow-up questions in double angle brackets. Example: 29 | <> 30 | <> 31 | <> 32 | 33 | Do no repeat questions that have already been asked. 34 | Make sure the last question ends with ">>". 35 | 36 | SOURCES: 37 | {context}`; 38 | 39 | const titleSystemPrompt = `Create a title for this chat session, based on the user question. The title should be less than 32 characters. Do NOT use double-quotes.`; 40 | 41 | export async function postChats(request: HttpRequest, context: InvocationContext): Promise { 42 | const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT; 43 | 44 | try { 45 | const requestBody = (await request.json()) as AIChatCompletionRequest; 46 | const { messages, context: chatContext } = requestBody; 47 | const userId = getUserId(request, requestBody); 48 | 49 | if (!messages || messages.length === 0 || !messages.at(-1)?.content) { 50 | return badRequest('Invalid or missing messages in the request body'); 51 | } 52 | 53 | let embeddings: Embeddings; 54 | let model: BaseChatModel; 55 | let store: VectorStore; 56 | let chatHistory; 57 | const sessionId = ((chatContext as any)?.sessionId as string) || uuidv4(); 58 | context.log(`userId: ${userId}, sessionId: ${sessionId}`); 59 | 60 | if (azureOpenAiEndpoint) { 61 | const credentials = getCredentials(); 62 | const azureADTokenProvider = getAzureOpenAiTokenProvider(); 63 | 64 | // Initialize models and vector database 65 | embeddings = new AzureOpenAIEmbeddings({ azureADTokenProvider }); 66 | model = new AzureChatOpenAI({ 67 | // Controls randomness. 0 = deterministic, 1 = maximum randomness 68 | temperature: 0.7, 69 | azureADTokenProvider, 70 | }); 71 | store = new AzureCosmosDBNoSQLVectorStore(embeddings, { credentials }); 72 | 73 | // Initialize chat history 74 | chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({ 75 | sessionId, 76 | userId, 77 | credentials, 78 | }); 79 | } else { 80 | // If no environment variables are set, it means we are running locally 81 | context.log('No Azure OpenAI endpoint set, using Ollama models and local DB'); 82 | embeddings = new OllamaEmbeddings({ model: ollamaEmbeddingsModel }); 83 | model = new ChatOllama({ 84 | temperature: 0.7, 85 | model: ollamaChatModel, 86 | }); 87 | store = await FaissStore.load(faissStoreFolder, embeddings); 88 | chatHistory = new FileSystemChatMessageHistory({ 89 | sessionId, 90 | userId, 91 | }); 92 | } 93 | 94 | // Create the chain that combines the prompt with the documents 95 | const ragChain = await createStuffDocumentsChain({ 96 | llm: model, 97 | prompt: ChatPromptTemplate.fromMessages([ 98 | ['system', ragSystemPrompt], 99 | ['human', '{input}'], 100 | ]), 101 | documentPrompt: PromptTemplate.fromTemplate('[{source}]: {page_content}\n'), 102 | }); 103 | // Handle chat history 104 | const ragChainWithHistory = new RunnableWithMessageHistory({ 105 | runnable: ragChain, 106 | inputMessagesKey: 'input', 107 | historyMessagesKey: 'chat_history', 108 | getMessageHistory: async () => chatHistory, 109 | }); 110 | // Retriever to search for the documents in the database 111 | const retriever = store.asRetriever(3); 112 | const question = messages.at(-1)!.content; 113 | const responseStream = await ragChainWithHistory.stream( 114 | { 115 | input: question, 116 | context: await retriever.invoke(question), 117 | }, 118 | { configurable: { sessionId } }, 119 | ); 120 | const jsonStream = Readable.from(createJsonStream(responseStream, sessionId)); 121 | 122 | // Create a short title for this chat session 123 | const { title } = await chatHistory.getContext(); 124 | if (!title) { 125 | const response = await ChatPromptTemplate.fromMessages([ 126 | ['system', titleSystemPrompt], 127 | ['human', '{input}'], 128 | ]) 129 | .pipe(model) 130 | .invoke({ input: question }); 131 | context.log(`Title for session: ${response.content as string}`); 132 | chatHistory.setContext({ title: response.content }); 133 | } 134 | 135 | return data(jsonStream, { 136 | 'Content-Type': 'application/x-ndjson', 137 | 'Transfer-Encoding': 'chunked', 138 | }); 139 | } catch (_error: unknown) { 140 | const error = _error as Error; 141 | context.error(`Error when processing chat-post request: ${error.message}`); 142 | 143 | return serviceUnavailable('Service temporarily unavailable. Please try again later.'); 144 | } 145 | } 146 | 147 | // Transform the response chunks into a JSON stream 148 | async function* createJsonStream(chunks: AsyncIterable, sessionId: string) { 149 | for await (const chunk of chunks) { 150 | if (!chunk) continue; 151 | 152 | const responseChunk: AIChatCompletionDelta = { 153 | delta: { 154 | content: chunk, 155 | role: 'assistant', 156 | }, 157 | context: { 158 | sessionId, 159 | }, 160 | }; 161 | 162 | // Format response chunks in Newline delimited JSON 163 | // see https://github.com/ndjson/ndjson-spec 164 | yield JSON.stringify(responseChunk) + '\n'; 165 | } 166 | } 167 | 168 | app.setup({ enableHttpStream: true }); 169 | app.http('chats-post', { 170 | route: 'chats/stream', 171 | methods: ['POST'], 172 | authLevel: 'anonymous', 173 | handler: postChats, 174 | }); 175 | -------------------------------------------------------------------------------- /docs/old-tutorial/04-session.md: -------------------------------------------------------------------------------- 1 | # Generate completion using `chain` in the `chat` API 2 | 3 | In this session, we will learn how to use a very important feature in LangChain.js: `chain`. 4 | 5 | ## What are `chains`? 6 | 7 | `Chains` in large language models (LLMs), such as GPT (Generative Pre-trained Transformer), refer to a technique where the outputs of a previous interaction are used as inputs for the next interaction with the model. This allows for a continuous and coherent conversation or thought process, where each new response takes into account the previous context, creating a "chain" of connected interactions. 8 | 9 | For example, if you ask the model about the recipe for a cake and then want to know how to change that recipe to make it vegan, the model will use the information from the previous conversation about the cake recipe to provide a relevant and specific answer about how to make the vegan version, rather than starting from scratch. This is useful for maintaining continuity and relevance in conversations or the processing of sequential information. 10 | 11 | And, of course, **[LangChain.js supports this functionality](https://js.langchain.com/docs/expression_language/streaming#chains)**. Let's see how we can use it. 12 | 13 | ## Integrating `ChatPromptTemplate` for dynamics interactions 14 | 15 | Open the `chat.ts` file and let's make some significant changes to this code. 16 | 17 | - `packages/packages/api/functions/chat.ts`: 18 | 19 | ```typescript 20 | import { app, HttpRequest, HttpResponseInit, InvocationContext } from '@azure/functions'; 21 | import { badRequest, serviceUnavailable } from '../utils'; 22 | import { AzureChatOpenAI, AzureOpenAIEmbeddings } from '@langchain/azure-openai'; 23 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 24 | 25 | import 'dotenv/config'; 26 | 27 | export async function chat(request: HttpRequest, context: InvocationContext): Promise { 28 | try { 29 | const requestBody: any = await request.json(); 30 | 31 | if (!requestBody?.question) { 32 | return badRequest(new Error('No question provided')); 33 | } 34 | 35 | const { question } = requestBody; 36 | 37 | const embeddings = new AzureOpenAIEmbeddings(); 38 | 39 | const model = new AzureChatOpenAI(); 40 | 41 | const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([ 42 | ['system', "Answer the user's questions based on the below context:\n\n{context}"], 43 | ['human', '{input}'], 44 | ]); 45 | 46 | return { 47 | status: 200, 48 | body: 'Testing chat function.', 49 | }; 50 | } catch (error: unknown) { 51 | const error_ = error as Error; 52 | context.error(`Error when processing request: ${error_.message}`); 53 | 54 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 55 | } 56 | } 57 | 58 | app.http('chat', { 59 | route: 'chat', 60 | methods: ['POST'], 61 | authLevel: 'anonymous', 62 | handler: chat, 63 | }); 64 | ``` 65 | 66 | Let's understand what we did here: 67 | 68 | First, we import the `ChatPromptTemplate` class from the `@langchain/core/prompts` package to create a chat prompt. This class is used to create a chat prompt that can be used to interact with the language model. 69 | 70 | Next, we create a conversation prompt called `questionAnsweringPrompt` that will be used to create a conversation with the language model. 71 | 72 | You will notice that right after we use the `ChatPromptTemplate` class, we are using the `fromMessages` method. This method is needed precisely to create a specific chat template prompt from individual chat messages or message-like tuples. That's why the return of this method is an array. So much so that, if you hit `CTRL + SPACE` in your code editor, you will see the following options below: 73 | 74 | ![fromMessages method](./images/from-messages-method.png) 75 | 76 | In this case, we put as the first parameter the type of message we are sending, which is `system` and `human`. The second parameter is the message we are sending. In this case, the system message is `Answer the user's questions based on the below context:\n\n{context}` and the user message is `{input}`. 77 | 78 | ## Implementing a `chain` for the `chat` API 79 | 80 | Now that we've created a more dynamic chat, let's implement the `chain` so that the conversation is more fluid and coherent. To do this, add the following code right after creating the `questionAnsweringPrompt`: 81 | 82 | ```typescript 83 | (... the previous code ...) 84 | 85 | const combineDocsChain = await createStuffDocumentsChain({ 86 | llm: model, 87 | prompt: questionAnsweringPrompt, 88 | }); 89 | 90 | const store = new AzureCosmosDBVectorStore(embeddings, {}); 91 | 92 | const chain = await createRetrievalChain({ 93 | retriever: store.asRetriever(), 94 | combineDocsChain, 95 | }); 96 | 97 | const response = await chain.invoke({ 98 | input: question 99 | }); 100 | 101 | return response 102 | ? ok({ response }) 103 | : serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 104 | } catch (error: unknown) { 105 | const error_ = error as Error; 106 | context.error(`Error when processing chat request: ${error_.message}`); 107 | 108 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 109 | }; 110 | 111 | app.http('chat', { 112 | route: 'chat', 113 | methods: ['POST'], 114 | authLevel: 'anonymous', 115 | handler: chat, 116 | }); 117 | ``` 118 | 119 | Let's understand again in each line what we did: 120 | 121 | We created a `combineDocsChain` using the `createStuffDocumentsChain` function. 122 | 123 | This function is used to create a string that passes a list of documents to a template. There a few parameters in this function. There include `llm`, which is the language model we are using, and `prompt`, which is the conversation we are having with the model. We will use them to create the chain. 124 | 125 | Just as we did in the `upload` API, we will need to store the vectors in the database. To do this, we created a variable called `store` so that we can instantiate the `AzureCosmosDBVectorStore` class. This class is used to create a vector that can be used to store and retrieve vectors from the language model. 126 | 127 | We create the `chain` using the `createRetrievalChain` function. This function is used precisely to create a retrieval chain that will retrieve the documents and then pass them on to the chat. That's why this function has two parameters: 128 | 129 | - `retriever`: which aims to return a list of documents. 130 | - `combineDocsChain`: which will reproduce a string output. 131 | 132 | Finally, we invoked the `chain` using the `invoke` method. This method is used to invoke the chain with the input question and get the response from the language model. 133 | 134 | Wow! we have completed our `chat` API. Now, let's test our API together with the `upload` API. 135 | 136 | ## Testing the `chat` API 137 | 138 | To test the two APIs, let's open the terminal again and run the following command inside the `packages/api` folder: 139 | 140 | ```bash 141 | npm run start 142 | ``` 143 | 144 | The message related to the `chat` and `upload` API will appear again. Open a new terminal and include the following command: 145 | 146 | ```bash 147 | curl -F "file=@data/support.pdf" http://localhost:7071/api/upload 148 | ``` 149 | 150 | If all goes well, you will see the following message: 151 | 152 | ```json 153 | { 154 | "message": "PDF file uploaded successfully." 155 | } 156 | ``` 157 | 158 | Now, let's test the `chat` API. To do this, go to the `api.http` file. We'll be using the `REST Client` extension to test the `chat` API. When it opens, send the request and see the result. 159 | 160 | ![chat API](./images/chat-api.png) 161 | 162 | You will see the exact response requested in the `chat` request. If you want to see the whole process, take a look at the gif below: 163 | 164 | ![chat API](./images/chat-final-result.gif) 165 | 166 | We haven't finished our project yet. We still have one more very important item that we mustn't forget to implement in a chat: `stream` response. We'll learn how to do this in the next session. 167 | 168 | ▶ **[Next Step: Generate `stream` response](./05-session.md)** 169 | -------------------------------------------------------------------------------- /docs/tutorial/01-introduction.md: -------------------------------------------------------------------------------- 1 | # Tutorial - Create a Serverless AI Chat with RAG using LangChain.js and TypeScript 2 | 3 | Welcome to the tutorial _Create a Serverless AI Chat with RAG using LangChain.js and TypeScript_. 4 | 5 | This tutorial will guide you through creating a serverless a AI Chat and RAG (Retrieval-Augmented Generation) application using LangChain.js, Azure Functions, Azure Cosmos DB for MongoDB vCore, Azure Blob Storage, and Azure Static Web Apps. 6 | 7 | The chatbot you're building can answer questions based on a set of enterprise documents uploaded from a fictional company called _Contoso Real Estate_. 8 | 9 | Here's an example of the application in action: 10 | 11 | ![AI Chat with RAG](../../docs/images/demo.gif) 12 | 13 | This tutorial will teach you how to build a serverless application using Azure Functions and LangChain.js. 14 | 15 | LangChain.js is a library used for building AI apps. It integrates LLMs, large language models like GPT, Claude-2 and more. It also makes it easy to develop AI-driven chatbots. Next, you'll learn how to set up the environment and deploy the application. 16 | 17 | The front end of the application is provided so that you can focus on the backend code and technologies. 18 | 19 | ## Prerequisites 20 | 21 | You can run the application in the tutorial using one of the following options: 22 | 23 | - Run the application locally on your machine. 24 | - Run the application using Codespaces. 25 | 26 | ### Run using Codespaces 27 | 28 | > It is highly recommended to use Codespaces for this tutorial. Codespaces is a cloud-based tool that enables you to run development environments without installing any tools on your computer. This way, you can focus on the development process without worrying about the environment setup. 29 | 30 | If you decide to continue using **[Codespaces](https://github.com/features/codespaces)**, you can follow the steps described in the README.md file at the root of the project. 31 | 32 | > **Note**: If you are using Codespaces, you don't need to install any of the prerequisites mentioned above. Codespaces already has all the necessary tools installed. Codespaces can be used for free for up to 60 hours per month, and this is renewed every month. 33 | 34 | ### Run Locally 35 | 36 | If you choose to use a local environment, you'll need to install: 37 | 38 | - [Node.js](https://nodejs.org/en/download/) 39 | - [TypeScript](https://www.typescriptlang.org/download) 40 | - [Visual Studio Code](https://code.visualstudio.com/download) 41 | - [Azure Functions Core Tools](https://docs.microsoft.com/en-us/azure/azure-functions/functions-run-local?tabs=windows%2Ccsharp%2Cbash) 42 | - [Git](https://git-scm.com/downloads) 43 | - [Azure Developer CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) 44 | 45 | > If you are a Windows user, you'll need to install [PowerShell](https://learn.microsoft.com/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.4), [Git Bash](https://git-scm.com/downloads) or [WSL2](https://learn.microsoft.com/windows/wsl/install) to run the bash commands. 46 | 47 | ## Project Overview 48 | 49 | Building AI applications can be complex and time-consuming. By using LangChain.js and Azure Functions including Serverless technologies, you can greatly simplify the process. These tools streamline the development by managing infrastructure concerns and scaling automatically, allowing you to focus more on building the chatbot functionality and less on the underlying system architecture. This application is a chatbot that uses a set of enterprise documents to generate AI responses to user queries. 50 | 51 | The code sample includes sample data to make trying the application quick and easy, but feel free to replace it with your own. You'll use a fictitious company called Contoso Real Estate, and the experience allows its customers to ask support questions about the usage of the company's products. The sample data includes a set of documents that describes the company's terms of service, privacy policy, and support guide. 52 | 53 | ## Understanding the project architecture 54 | 55 | The architecture of the project is shown in the following diagram: 56 | 57 | ![AI Chat with RAG](../../docs/images/architecture.drawio.png) 58 | 59 | To understand the architecture of the project, let's break it down into its individual components: 60 | 61 | 1. **Web App:** 62 | 63 | - The user interface for the chatbot is a web application built with **[Lit](https://lit.dev/)** (a library for building web components) and hosted using **[Azure Static Web Apps](https://learn.microsoft.com/azure/static-web-apps/overview)**. It provides a chat interface for users they can use to ask questions. 64 | - The code is in the `packages/webapp` folder. 65 | 66 | 2. **Serverless API:** 67 | 68 | - When a user sends a query through the web app, it is sent via HTTP to an API built using Azure Functions. 69 | - The API uses LangChain.js to process the query. 70 | - The API manages the logic of corporate documents and responds with answers to chat queries. 71 | - The code for this functionality will be shown later in the tutorial and is in the `packages/api` folder. 72 | 73 | 3. **Database:** 74 | 75 | - Text extracted from the documents and the vectors generated by LangChain.js is stored in Azure Cosmos DB for MongoDB vCore. 76 | - The database allows for the storage and retrieval of text chunks using vector search, which enables quick and relevant responses based on the user's queries. 77 | 78 | 4. **File Storage:** 79 | 80 | - The source documents such as terms of service, privacy policy, and support guides for the Contoso Real Estate are stored in Azure Blob Storage. This is where the PDF documents are uploaded and retrieved from. 81 | 82 | 5. **Azure OpenAI Service:** 83 | 84 | - This service is where the AI Model (a Large Language Model or LLM) is hosted. The model can understand and generate natural language. This is used to embed text chunks or generate answers based on the vector search from the database. 85 | 86 | Let's examine the application flow based on the architecture diagram: 87 | 88 | - A user interacts with the chat interface in the web app 89 | - The web app sends the user's query to the Serverless API via HTTP calls 90 | - The Serverless API interacts with Azure OpenAI Service to generate a response, using the data from Azure Cosmos DB for MongoDB vCore. 91 | - If there's a need to reference the documents, Azure Blob Storage is used to retrieve the PDF documents. 92 | - The generated response is then sent back to the web app and displayed to the user. 93 | 94 | The architecture is based on the RAG (Retrieval-Augmented Generation) architecture. This architecture combines the ability to retrieve information from a database with the ability to generate text from a language model. You'll learn more about RAG later in the tutorial. 95 | 96 | ## Executing the Project 97 | 98 | Now that you understand the project's architecture, let's run it! 99 | 100 | Once you have `forked` and `cloned` the project, use the `starter` branch to continue with the tutorial. The `main` branch has the finished project if you wish to view it! 101 | 102 | To execute the project, follow these steps: 103 | 104 | 1. Install the project dependencies: 105 | 106 | ```bash 107 | npm install 108 | ``` 109 | 110 | 2. To run the project, with only Front-end, execute the following command: 111 | 112 | ```bash 113 | npm run start:webapp 114 | ``` 115 | 116 | > At this point, do not worry about the other scripts in the `package.json` file at the root of the project. They will be used throughout the tutorial. 117 | 118 | 3. Open your browser and go to `http://localhost:8000`. The application will be displayed, as shown in the image below: 119 | 120 | ![FrontEnd application](./images/application-webapp.png) 121 | 122 | ## Next Steps 123 | 124 | Here are some additional resources for you to delve into: 125 | 126 | - **[Azure Functions Documentation](https://learn.microsoft.com/azure/azure-functions/)** 127 | - **[Azure Cosmos DB for MongoDB vCore Documentation](https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/)** 128 | - **[Azure Blob Storage Documentation](https://learn.microsoft.com/azure/storage/blobs/)** 129 | - **[Azure Static Web Apps Documentation](https://learn.microsoft.com/azure/static-web-apps/)** 130 | - **[LangChain.js Documentation](https://js.langchain.com/docs/get_started/introduction)** 131 | - **[OpenAI API Documentation](https://platform.openai.com/docs/introduction)** 132 | - **[Lit Documentation](https://lit.dev/)** 133 | - **[TypeScript Documentation](https://www.typescriptlang.org/docs/)** 134 | - **[Node.js Documentation](https://nodejs.org/en/docs/)** 135 | - **[Visual Studio Code Documentation](https://code.visualstudio.com/docs)** 136 | - **[Git Documentation](https://git-scm.com/doc)** 137 | - **[Azure Developer CLI Documentation](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)** 138 | - **[PowerShell Documentation](https://learn.microsoft.com/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.4)** 139 | 140 | In the next section, we will start to create the API using Azure Functions. See you there! 141 | 142 | **[Next Step: Setting Up the Serverless Environment using Azure Functions ➡️](./02-setting-up-azure-functions.md)** 143 | -------------------------------------------------------------------------------- /docs/readme.md: -------------------------------------------------------------------------------- 1 | --- 2 | page_type: sample 3 | languages: 4 | - azdeveloper 5 | - javascript 6 | - typescript 7 | - nodejs 8 | - bicep 9 | products: 10 | - azure 11 | - azure-openai 12 | - ai-services 13 | urlFragment: serverless-chat-langchainjs 14 | name: Serverless AI Chat with RAG using LangChain.js 15 | description: Build your own serverless AI Chat with Retrieval-Augmented-Generation using LangChain.js, TypeScript and Azure. 16 | --- 17 | 18 | 19 | 20 | This sample shows how to build a serverless AI chat experience with Retrieval-Augmented Generation using [LangChain.js](https://js.langchain.com/) and Azure. The application is hosted on [Azure Static Web Apps](https://learn.microsoft.com/azure/static-web-apps/overview) and [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript), with [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search) as the vector database. You can use it as a starting point for building more complex AI applications. 21 | 22 | ![Animation showing the chat app in action](./images/demo.gif) 23 | 24 | ## Overview 25 | 26 | Building AI applications can be complex and time-consuming, but using LangChain.js and Azure serverless technologies allows to greatly simplify the process. This application is a chatbot that uses a set of enterprise documents to generate responses to user queries. 27 | 28 | We provide sample data to make this sample ready to try, but feel free to replace it with your own. We use a fictitious company called _Contoso Real Estate_, and the experience allows its customers to ask support questions about the usage of its products. The sample data includes a set of documents that describes its terms of service, privacy policy and a support guide. 29 | 30 | ![Application architecture](./images/architecture.drawio.png) 31 | 32 | This application is made from multiple components: 33 | 34 | - A web app made with a single chat web component built with [Lit](https://lit.dev) and hosted on [Azure Static Web Apps](https://learn.microsoft.com/azure/static-web-apps/overview). The code is located in the `packages/webapp` folder. 35 | 36 | - A serverless API built with [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript) and using [LangChain.js](https://js.langchain.com/) to ingest the documents and generate responses to the user chat queries. The code is located in the `packages/api` folder. 37 | 38 | - A database to store chat sessions and the text extracted from the documents and the vectors generated by LangChain.js, using [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/). 39 | 40 | - A file storage to store the source documents, using [Azure Blob Storage](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction). 41 | 42 | ## Prerequisites 43 | 44 | - [Node.js LTS](https://nodejs.org/download/) 45 | - [Azure Developer CLI](https://aka.ms/azure-dev/install) 46 | - [Git](https://git-scm.com/downloads) 47 | - Azure account. If you're new to Azure, [get an Azure account for free](https://azure.microsoft.com/free) to get free Azure credits to get started. If you're a student, you can also get free credits with [Azure for Students](https://aka.ms/azureforstudents). 48 | - Azure subscription with access enabled for the Azure OpenAI service. You can request access with [this form](https://aka.ms/oaiapply). 49 | - Azure account permissions: 50 | - Your Azure account must have `Microsoft.Authorization/roleAssignments/write` permissions, such as [Role Based Access Control Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview), [User Access Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#user-access-administrator), or [Owner](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#owner). If you don't have subscription-level permissions, you must be granted [RBAC](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview) for an existing resource group and [deploy to that existing group](docs/deploy_existing.md#resource-group). 51 | - Your Azure account also needs `Microsoft.Resources/deployments/write` permissions on the subscription level. 52 | 53 | ## Setup the sample 54 | 55 | You can run this project directly in your browser by using GitHub Codespaces, which will open a web-based VS Code. 56 | 57 | 1. [**Fork**](https://github.com/Azure-Samples/serverless-chat-langchainjs/fork) the project to create your own copy of this repository. 58 | 2. On your forked repository, select the **Code** button, then the **Codespaces** tab, and clink on the button **Create codespace on main**. 59 | ![Screenshot showing how to create a new codespace](./images/codespaces.png) 60 | 3. Wait for the Codespace to be created, it should take a few minutes. 61 | 62 | ## Deploy on Azure 63 | 64 | 1. Open a terminal at the root of the project. 65 | 2. Authenticate with Azure by running `azd auth login`. 66 | 3. Run `azd up` to deploy the application to Azure. This will provision Azure resources, deploy this sample, and build the search index based on the files found in the `./data` folder. 67 | - You will be prompted to select a base location for the resources. If you're unsure of which location to choose, select `eastus2`. 68 | - By default, the OpenAI resource will be deployed to `eastus2`. You can set a different location with `azd env set AZURE_OPENAI_RESOURCE_GROUP_LOCATION `. Currently only a short list of locations is accepted. That location list is based on the [OpenAI model availability table](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability) and may become outdated as availability changes. 69 | 70 | The deployment process will take a few minutes. Once it's done, you'll see the URL of the web app in the terminal. 71 | 72 | ![Screenshot of the azd up command result](./images/azd-up.png) 73 | 74 | You can now open the web app in your browser and start chatting with the bot. 75 | 76 | ## Enable CI/CD (Optional) 77 | 78 | If you want to enable Continuous Deployment for your forked repository, you need to configure the Azure pipeline first: 79 | 80 | 1. Open a terminal at the root of your forked project. 81 | 2. Authenticate with Azure by running `azd auth login`. 82 | 3. Run `azd pipeline config` to configure the required secrets and variables for connecting to Azure from GitHub Actions. 83 | - This command will set up the necessary Azure service principal and configure GitHub repository secrets. 84 | - Follow the prompts to complete the configuration. 85 | 86 | Once configured, the GitHub Actions workflow will automatically deploy your application to Azure whenever you push changes to the main branch. 87 | 88 | ## Key concepts 89 | 90 | Our API is composed of two main endpoints: 91 | 92 | - `POST /documents`: This endpoint allows to upload a PDF documents in the database. Using LangChain.js, we extract the text from the PDF file, split it into smaller chunks, and generate vectors for each chunk. We store the text and the vectors in the database for later use. 93 | 94 | - `POST /chats`: This endpoint receives a list of messages, the last being the user query and returns a response generated by the LLM. It uses the documents stored in the database to generate the response. We use LangChain.js components to connect to the database, load the documents and perform a vector search after vectorizing the user query. After that, the most relevant documents are injected into the prompt, and we generate the response. While this process seems complex, LangChain.js does all the heavy lifting for us so we can focus on the application flow. 95 | 96 | The `/documents` endpoint is used to ingest the documents after the application is deployed by uploading the PDFs, using either `curl` commands or the Node.js script we built (have a look at the `postup` hook in the `azure.yaml` file). 97 | 98 | The web app is a simple chat interface that sends the user queries to the `/chat` endpoint and displays the responses. 99 | We use the [HTTP protocol for AI chat apps](https://aka.ms/chatprotocol) to communicate between the web app and the API. 100 | 101 | ## Clean up 102 | 103 | To clean up all the Azure resources created by this sample: 104 | 105 | 1. Run `azd down --purge` 106 | 2. When asked if you are sure you want to continue, enter `y` 107 | 108 | The resource group and all the resources will be deleted. 109 | 110 | ## Troubleshooting 111 | 112 | If you have any issue when running or deploying this sample, please check the [troubleshooting guide](./troubleshooting.md). If you can't find a solution to your problem, please [open an issue](https://github.com/Azure-Samples/serverless-chat-langchainjs/issues) in this repository. 113 | 114 | ## Next steps 115 | 116 | Here are some resources to learn more about the technologies used in this sample: 117 | 118 | - [LangChain.js documentation](https://js.langchain.com) 119 | - [Generative AI with JavaScript](https://github.com/microsoft/generative-ai-with-javascript) 120 | - [Generative AI For Beginners](https://github.com/microsoft/generative-ai-for-beginners) 121 | - [Azure OpenAI Service](https://learn.microsoft.com/azure/ai-services/openai/overview) 122 | - [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/) 123 | - [Ask YouTube: LangChain.js + Azure Quickstart sample](https://github.com/Azure-Samples/langchainjs-quickstart-demo) 124 | - [Chat + Enterprise data with Azure OpenAI and Azure AI Search](https://github.com/Azure-Samples/azure-search-openai-javascript) 125 | - [Revolutionize your Enterprise Data with Chat: Next-gen Apps w/ Azure OpenAI and AI Search](https://aka.ms/entgptsearchblog) 126 | 127 | You can also find [more Azure AI samples here](https://github.com/Azure-Samples/azureai-samples). 128 | -------------------------------------------------------------------------------- /docs/tutorial/04-preparing-understanding-language-models.md: -------------------------------------------------------------------------------- 1 | # Preparing and Understanding Language Models: Configuring Azure OpenAI Service and Installing Ollama with Llama3.1 8B 2 | 3 | This section we will cover the language models used in the project. Throughout the tutorial, we will also learn how to generate the environment variables needed to use the Azure Services, including the **[Azure OpenAI Service](https://learn.microsoft.com/azure/ai-services/openai/overview)**. 4 | 5 | We will also teach you how to use **[Ollama](https://ollama.com/)** with **[Llama3.1 8B](https://www.llama.com/)**, an Open Source Language Model, if you want to use it locally. 6 | 7 | ## Models to be used in the project 8 | 9 | We will teach you how to use two different language models: GPT-3.5 Turbo integrated with _Azure OpenAI Service_ (on Azure) and _Ollama with Llama3.1 8B_ (if you decide to use a model locally). Let's take a look at each of them. 10 | 11 | ### GPT-3.5 Turbo Integrated with Azure OpenAI Service 12 | 13 | ![Azure OpenAI Service Page](./images/azure-openai-page.png) 14 | 15 | OpenAI has developed GPT-3.5 Turbo, an improved version of the already impressive GPT-3.5. This model provides faster and more correct responses, making it a reliable tool for companies and developers who need to generate text or perform other tasks related to Natural Language Processing (NLP). 16 | 17 | You have the choice to use either **[OpenAI Service](https://openai.com/)** or **[Azure OpenAI Service](https://azure.microsoft.com/products/ai-services/openai-service)**. For this tutorial, we will be using Azure OpenAI Service, a version of OpenAI Service hosted on the Azure platform. 18 | 19 | Azure OpenAI Service provides REST API access in many programming languages, including Python, Node.js, and C#. Additionally, it offers advanced language models like GPT-4, GPT-4-Turbo with Vision, which are versatile and adaptable to various tasks such as content generation, summarization, image recognition, semantic search, and text-to-code translation. 20 | 21 | ### Ollama with Llama3.1 8B 22 | 23 | ![Ollama Page](./images/ollama-page.png) 24 | 25 | **[Ollama](https://ollama.com/)** presents itself as an open-source solution, offering a transparent and modifiable platform. The Llama3.1 8B has 8 billion parameters and is designed to be effective, efficient in terms of cost and scability. 26 | 27 | Ollama's openness encourages innovation and collaboration within the developer community. Users can adapt the model to their specific needs, experiment with innovative ideas, or integrate the model in ways that proprietary services might not allow. 28 | 29 | Additionally, using an open-source language model can decrease expenses, which is a crucial factor for projects with restricted budgets or for those who only wish to experiment with language models. 30 | 31 | ![Llama3.1 8B Page](./images/mistral-7b-page.png) 32 | 33 | ## Creating Azure resources 34 | 35 | To use the Azure OpenAI Service, you need an Azure account. If you don't have one, you can create one for free [here](https://azure.microsoft.com/pt-br/free/). 36 | 37 | > **Note:** if you are a student, you can get free credits for Azure through Microsoft Azure for Students. 38 | 39 | > **Note:** if you decide to use the Azure OpenAI Service, you must fill out a request form to access the service. You can request access to the service by filling out the form [here](https://aka.ms/oaiapply). 40 | 41 | After creating your Azure account and being approved of the Azure OpenAI Service, we will continue as follows: 42 | 43 | > **Note:** Instead of PowerShell, you can also use Git Bash or WSL to run the Azure Developer CLI commands. 44 | 45 | | Step | Description | 46 | | ---- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | 47 | | 1 | Return to the `main` branch of the project repository. | 48 | | 2 | Open a terminal at the root of the project. | 49 | | 3 | To deploy the application to Azure, run the command **run azd**. This will provision Azure resources, deploy the sample, and build the search index based on the files found in the **./data** folder. | 50 | | 4 | You will be prompted to select a base location for the resources. If you don't know which one to choose, you can select **eastus2**. | 51 | | 5 | By default, the OpenAI resource will be deployed to **eastus2**. You can set a different location with `azd env set AZURE_OPENAI_RESOURCE_GROUP_LOCATION `. Currently only a brief list of locations is accepted. That location list is based on the **[OpenAI model availability table](https://learn.microsoft.com/pt-br/azure/ai-services/openai/concepts/models#standard-deployment-model-availability)** and may become outdated as availability changes. | 52 | 53 | The deployment process will only take a few minutes. Afterward, the URL of the web app will appear in the terminal. 54 | 55 | ![Deploy aplication](./images/deployed-app.png) 56 | 57 | Open the link for the web app in your browser and start chatting with the bot. 58 | 59 | To check the resources created, go to the Azure portal and look for a resource group containing the following resources: 60 | 61 | ![Azure Services for the Application](./images/services-azure-portal.png) 62 | 63 | The templates used to deploy the resources can be found in the `infra` folder, where we used Infrastructure as Code to set up the resources. 64 | 65 | > **Note:** if you want to simply browse the project code and see it in action, go to the `main` branch where the entire application is ready and follow the steps described in the article [Build a serverless AI Chat with RAG using LangChain.js](https://techcommunity.microsoft.com/t5/apps-on-azure-blog/build-a-serverless-chatgpt-with-rag-using-langchain-js/ba-p/4111041), written by **[Yohan Lasorsa](https://twitter.com/sinedied)**. 66 | 67 | ## Installing Ollama and Local Models 68 | 69 | Before installing Ollama, please ensure you meet the prerequisites, which include sufficient free space, recommended amounts of RAM, and a fast CPU or GPU. For more details about running LLM locally, see **[here](open-webui/open-webui#736)**. 70 | 71 | #### Memory requirements 72 | 73 | - _7b models generally require at least 8GB of RAM_ 74 | - _13b models generally require at least 16GB of RAM_ 75 | - _70b models generally require at least 64GB of RAM_ 76 | 77 | > **Note:** If you encounter issues with higher quantization levels, consider using the q4 model or close any other memory-intensive programs. 78 | 79 | > **Note:** Ollama supports various operating systems such as Linux, MacOS, and Windows. For installation details, visit the official project documentation **[here](https://ollama.com/download)**. 80 | 81 | > **Note:** Ollama cannot be used in Codespaces. It must be installed on a local machine for use. 82 | 83 | To begin, download the necessary models for this project by running the following commands in your terminal: 84 | 85 | ```bash 86 | ollama pull llama3.1:latest 87 | ollama pull nomic-embed-text:latest 88 | ``` 89 | 90 | We will use the Llama3.1 8B model, a powerful language model, and the All-MiniLM model, a small embedding model, to generate vectors from the text for the chatbot. 91 | 92 | > **Note:** The Llama3.1 model will download several gigabytes of data, so the process may take some time depending on your internet connection. 93 | 94 | After downloading the models, you can verify the proper functioning of the Ollama server by executing the following command: 95 | 96 | ```bash 97 | ollama run llama3.1:latest 98 | ``` 99 | 100 | An invite will be displayed in your terminal, allowing you to directly communicate with the AI model in a chat-like format. 101 | 102 | ![Ollama Llama3.1 8B](./images/ollama-mistra-cli.png) 103 | 104 | Ask the model some questions and watch the answers. This will provide insight into the model's capabilities and how to interact with it. 105 | 106 | After to finish to test the Ollama server, you can stop it by pressing **Ctrl+D** in your terminal. 107 | 108 | ## Next Steps 109 | 110 | This tutorial covers language models that will be used in the project. Choose the best model to suit your needs. To use the Azure OpenAI Service, follow the instructions to set up the service in Azure. To use Ollama with Llama3.1 8B, follow the instructions to install Ollama and the local models. 111 | 112 | To begin developing the application, we first need to create some configuration files for the project. We'll cover this in the next section! 113 | 114 | **[⬅️ Back: Understanding the RAG (Retrieval Augmented Generation) architecture](03-understanding-rag.md)** | **[Next: Developing the `chat` API ➡️ ](./05-config-files-app.md)** 115 | -------------------------------------------------------------------------------- /docs/old-tutorial/01-session.md: -------------------------------------------------------------------------------- 1 | # Session 01: Creating a function with Azure OpenAI, LangChain and Azure Functions 2 | 3 | **[Article - How to create a structure the new v4 programming model of Azure Functions](https://techcommunity.microsoft.com/t5/educator-developer-blog/step-by-step-guide-migrating-v3-to-v4-programming-model-for/ba-p/3897691)** 4 | 5 | ## Overview 6 | 7 | In this tutorial, we will create a function that uses Azure OpenAI to answer questions. To do this, we will use the `@langchain/azure-openai` and `langchain` packages. And then, we will use Azure Functions v4 programming model to create the function. 8 | 9 | ## Install the Azure OpenAI SDK package and LangChain.js 10 | 11 | Now that we have the initial structure of the project, let's install the Azure OpenAI SDK package. To do this, type the following command in the terminal: 12 | 13 | - `packages/api` 14 | 15 | ```bash 16 | npm install -S @langchain/azure-openai 17 | ``` 18 | 19 | The package above depends on the installation of `langchain`. If you haven't installed it yet, run the following command: 20 | 21 | ```bash 22 | npm install -S langchain 23 | ``` 24 | 25 | ## Configure access credentials 26 | 27 | As we will need to include the access credentials to Azure OpenAI, let's create a `.env` file at the root of the project with the following variables: 28 | 29 | - `.env` 30 | 31 | ```env 32 | AZURE_OPENAI_API_ENDPOINT="" 33 | AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME="" 34 | AZURE_OPENAI_API_KEY="" 35 | ``` 36 | 37 | **todo: explain what each variable is** 38 | 39 | > you can get these credentials in the Azure portal. If you don't have an account, you can create one for free. **[Link](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal)** 40 | 41 | We will need to install the `dotenv` package to load the environment variables. Run the following command: 42 | 43 | ```bash 44 | npm install -S dotenv 45 | ``` 46 | 47 | ## Create `Embeddings` with Azure OpenAI in the `Chat.ts` function 48 | 49 | Now that we have the Azure OpenAI SDK package installed and the access credentials configured, open the `functions/chat.ts` file and inside the `chat` function add the following code: 50 | 51 | - `packages/api/src/functions/chat.ts` 52 | 53 | ```typescript 54 | import { HttpRequest, HttpResponseInit, InvocationContext } from '@azure/functions'; 55 | import { AzureOpenAIEmbeddings } from '@langchain/azure-openai'; 56 | import 'dotenv/config'; 57 | 58 | export async function chat(request: HttpRequest, context: InvocationContext): Promise { 59 | context.log(`Http function processed request for url "${request.url}"`); 60 | 61 | try { 62 | const requestBody: any = await request.json(); 63 | 64 | if (!requestBody?.question) { 65 | return { 66 | status: 400, 67 | jsonBody: { 68 | error: 'No question provided', 69 | }, 70 | }; 71 | } 72 | 73 | const { question } = requestBody; 74 | 75 | const embeddings = new AzureOpenAIEmbeddings(); 76 | 77 | const prompt = `Question: ${question}`; 78 | context.log(`Sending prompt to the model: ${prompt}`); 79 | 80 | const promptResponse = await embeddings.embedQuery(prompt); 81 | 82 | if (promptResponse) { 83 | return { 84 | status: 200, 85 | jsonBody: { 86 | promptResponse, 87 | }, 88 | }; 89 | } else { 90 | return { 91 | status: 503, 92 | jsonBody: { 93 | error: 'Service temporarily unavailable. Please try again later.', 94 | }, 95 | }; 96 | } 97 | } catch (error: unknown) { 98 | const err = error as Error; 99 | context.error(`Error when processing chat request: ${err.message}`); 100 | 101 | return { 102 | status: 503, 103 | jsonBody: { 104 | error: 'Service temporarily unavailable. Please try again later.', 105 | }, 106 | }; 107 | } 108 | } 109 | ``` 110 | 111 | Let's understand what we did: 112 | 113 | 1. We import the `AzureOpenAIEmbeddings` class from the `@langchain/azure-openai` package and the `dotenv` package to load the environment variables. 114 | 115 | 2. As this is a `POST`, we need to check if the request body contains the `question` key. If it does not, we return a 400 error. 116 | 117 | 3. We instantiate the `AzureOpenAIEmbeddings`, which will allow us to retrieve the environment variables contained in the `.env` file. 118 | 119 | 4. We create a prompt with the question received in the request. The prompt is a string that contains the question and the keyword `Question:`. This is necessary for the Azure OpenAI model to understand that we are asking a question. 120 | 121 | 5. Sending the prompt to the Azure OpenAI model and returning the response. 122 | 123 | This code will need to be refactored later. But, let's first focus on testing the `chat` function locally. 124 | 125 | ## Testing the `Chat` API 126 | 127 | Before testing the `chat` function, let's create a file called `api.http` at the root of the project with the following content: 128 | 129 | - `packages/api/api.http` 130 | 131 | ```http 132 | ################################################################## 133 | # VS Code with REST Client extension is needed to use this file. 134 | # Download at: https://aka.ms/vscode/rest-client 135 | ################################################################## 136 | 137 | @api_host = http://localhost:7071 138 | 139 | # Chat with a bot (this is a sample ---> will be change) 140 | POST {{api_host}}/api/chat 141 | Content-Type: application/json 142 | 143 | { 144 | "question": "What is the United States currency?" 145 | } 146 | ``` 147 | 148 | We recommend the `REST Client` extension for Visual Studio Code. With it, you can execute HTTP requests directly from your editor. 149 | 150 | Now that we have the code ready, let's test the `chat` function locally. Run the following command: 151 | 152 | ```bash 153 | npm run start 154 | ``` 155 | 156 | You should see the following message in the terminal: 157 | 158 | ```bash 159 | Worker process started and initialized. 160 | 161 | Functions: 162 | 163 | chat: [POST] http://localhost:7071/api/chat 164 | ``` 165 | 166 | Now open the `api.http` file and click the `Send Request` button that will appear in the upper right corner of the file. You should see the response from the Azure OpenAI model. 167 | 168 | If the response appears as shown in the image below, congratulations! We have just created our first function using Azure OpenAI, with LangChain and Azure Functions. 169 | 170 | ![Alt text](images/post-request-chat.png) 171 | 172 | Maybe you wonder: 'Why is `promptResponse` returning an array of numbers?' This happens because `embedQuery` returns an array of numbers. In NPL (Natural Language Processing), these numbers represent the vector representation of the model's response. 173 | 174 | **todo: explain better what a vector representation is** 175 | 176 | ## Refactoring the `chat` function 177 | 178 | Now that we have the `chat` function working, let's refactor the code to make it clearer and easier to maintain. You may have noticed that the code contains many `statusCode`, which ends up making the code quite repetitive! 179 | 180 | Inside the `src` folder, create a folder called `utils` and inside it create a file called: `http-helper.ts` with the following block of code: 181 | 182 | - `packages/api/src/utils/http-helper.ts` 183 | 184 | ```typescript 185 | import { HttpResponseInit } from '@azure/functions'; 186 | 187 | export function badRequest(error: Error): HttpResponseInit { 188 | return { 189 | status: 400, 190 | jsonBody: { 191 | error: error.message, 192 | }, 193 | }; 194 | } 195 | 196 | export function notFound(error: Error): HttpResponseInit { 197 | return { 198 | status: 404, 199 | jsonBody: { 200 | error: error.message, 201 | }, 202 | }; 203 | } 204 | 205 | export function serviceUnavailable(error: Error): HttpResponseInit { 206 | return { 207 | status: 503, 208 | jsonBody: { 209 | error: error.message, 210 | }, 211 | }; 212 | } 213 | 214 | export function internalServerError(error: Error): HttpResponseInit { 215 | return { 216 | status: 500, 217 | jsonBody: { 218 | error: error.message, 219 | }, 220 | }; 221 | } 222 | 223 | export function unauthorized(error: Error): HttpResponseInit { 224 | return { 225 | status: 401, 226 | jsonBody: { 227 | error: error.message, 228 | }, 229 | }; 230 | } 231 | 232 | export function noContent(): HttpResponseInit { 233 | return { 234 | status: 204, 235 | }; 236 | } 237 | 238 | export function created(body: Record): HttpResponseInit { 239 | return { 240 | status: 201, 241 | jsonBody: body, 242 | }; 243 | } 244 | 245 | export function ok(body: Record): HttpResponseInit { 246 | return { 247 | status: 200, 248 | jsonBody: body, 249 | }; 250 | } 251 | ``` 252 | 253 | Note that we abstracted the logic of returning each status code into separate functions. This will help us to keep the code cleaner and easier to maintain. 254 | 255 | Inside that same `utils` folder, now create a file called `index.ts` with the following block of code: 256 | 257 | - `packages/api/src/utils/index.ts` 258 | 259 | ```typescript 260 | export * from './http-helper'; 261 | ``` 262 | 263 | Done! Now that we have the status code return functions abstracted, let's refactor the `chat.ts` function to use these functions. 264 | 265 | Open the file `chat.ts` and replace the code block with: 266 | 267 | - `packages/api/src/functions/chat.ts` 268 | 269 | ```typescript 270 | import { HttpRequest, InvocationContext, HttpResponseInit } from '@azure/functions'; 271 | import { AzureOpenAIEmbeddings } from '@langchain/azure-openai'; 272 | import 'dotenv/config'; 273 | import { badRequest, serviceUnavailable, ok } from '../utils'; 274 | 275 | export async function chat(request: HttpRequest, context: InvocationContext): Promise { 276 | context.log(`Http function processed request for url "${request.url}"`); 277 | 278 | try { 279 | const requestBody: any = await request.json(); 280 | 281 | if (!requestBody?.question) { 282 | return badRequest(new Error('No question provided')); 283 | } 284 | 285 | const { question } = requestBody; 286 | 287 | const embeddings = new AzureOpenAIEmbeddings(); 288 | 289 | const prompt = `Question: ${question}`; 290 | context.log(`Sending prompt to the model: ${prompt}`); 291 | 292 | const promptResponse = await embeddings.embedQuery(prompt); 293 | 294 | return promptResponse 295 | ? ok({ promptResponse }) 296 | : serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 297 | } catch (error: unknown) { 298 | const error_ = error as Error; 299 | context.error(`Error when processing chat request: ${error_.message}`); 300 | 301 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 302 | } 303 | } 304 | ``` 305 | 306 | Now that we've refactored the `chat` function, let's test it again locally. Run the command: 307 | 308 | ```bash 309 | npm run start 310 | ``` 311 | 312 | Open the `api.http` file and click the `Send Request` button. You should see the response from the Azure OpenAI template. If everything went well, congratulations! We've refactored the `chat` function and made it cleaner and easier to maintain. 🎉 313 | 314 | At this point, let's stop implementing the `chat` function, we'll come back to it later. 315 | 316 | In the next step, we will start creating the use of `CosmosDB LC vector store` to store the vectors generated by Azure OpenAI. 317 | 318 | ▶ **[Next Step: Init `CosmosDB LC Vector Store` in the project](./02-session.md)** 319 | -------------------------------------------------------------------------------- /docs/old-tutorial/05-session.md: -------------------------------------------------------------------------------- 1 | # Generate a stream response in the `chat` API 2 | 3 | In this session, we will learn how to generate a stream response in the `chat` API, using LangChain.js and including the new feature on stream available also for the v4 of the Azure Functions programming model. 4 | 5 | ## What is streaming? 6 | 7 | Streaming is crucial for Large Language models (LLMs) for several reasons: 8 | 9 | - **It manages memory resources efficiently**: allowing models to process long texts without overloading memory. 10 | - **It improves scalability**: making it easier to process inputs of virtually unlimited size. 11 | - **Reduces latency in real-time interactions**: providing faster responses in virtual assistants and dialog systems. 12 | - **Facilitates training and inference** on large data sets, making the use of LLMs more practical and efficient. 13 | - **It can improve the quality of the text generated**: helping models to focus on smaller pieces of text for greater cohesion and contextual relevance. 14 | - **Supports distributed workflows**: allowing models to be scaled to meet intense processing demands. 15 | 16 | As such, certain large language models (LLMs) have the ability to send responses sequentially. This means that you don't need to wait for the full response to be received before you can start working with it. This feature is especially advantageous if you want to show the response to the user as it is produced or if you need to analyze and use the response while it is being formed. 17 | 18 | And LangChain.js supports the use of streaming. Making use of the `.stream()` method. If you want to know more about the `.stream()` method, you can access the **[official LangChain.js documentation](https://js.langchain.com/docs/use_cases/question_answering/streaming#chain-with-sources)**. 19 | 20 | ## Support for HTTP Streams in Azure Functions 21 | 22 | The Azure Functions product team recently announced the availability of support for HTTP Streams in version 4 of the Azure Functions programming model. With this, it is now possible to return stream responses in HTTP APIs, which is especially useful for real-time data streaming scenarios. 23 | 24 | To find out more about streaming support in Azure Functions v4, you can visit Microsoft's Tech Community blog by clicking **[here](https://techcommunity.microsoft.com/t5/apps-on-azure-blog/azure-functions-support-for-http-streams-in-node-js-is-now-in/ba-p/4066575)**. 25 | 26 | ## Enabling HTTP Streams support in Azure Functions 27 | 28 | Well, now that we've understood the importance of using streaming in a chat and how useful it can be, let's learn how we can introduce it into the `chat` API. 29 | 30 | The first thing we need to do is enable the new Azure Functions feature, which is streaming support. To do this, open the file `chat.ts` and include the following code: 31 | 32 | - `api/functions/chat.ts` 33 | 34 | ```typescript 35 | (... previous code ...) 36 | 37 | app.setup({ enableHttpStream: true }); 38 | app.post('chat', { 39 | route: 'chat', 40 | authLevel: 'anonymous', 41 | handler: chat, 42 | }); 43 | ``` 44 | 45 | So the `chat.ts` file will look like this: 46 | 47 | - `api/functions/chat.ts` 48 | 49 | ```typescript 50 | import { Readable } from 'node:stream'; 51 | import { Document } from '@langchain/core/documents'; 52 | import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions'; 53 | import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/azure-openai'; 54 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 55 | import { createStuffDocumentsChain } from 'langchain/chains/combine_documents'; 56 | import { AzureCosmosDBVectorStore } from '@langchain/community/vectorstores/azure_cosmosdb'; 57 | import { createRetrievalChain } from 'langchain/chains/retrieval'; 58 | import 'dotenv/config'; 59 | import { badRequest, serviceUnavailable } from '../utils'; 60 | 61 | export async function chat(request: HttpRequest, context: InvocationContext): Promise { 62 | context.log(`Http function processed request for url "${request.url}"`); 63 | 64 | try { 65 | const requestBody: any = await request.json(); 66 | 67 | if (!requestBody?.question) { 68 | return badRequest(new Error('No question provided')); 69 | } 70 | 71 | const { question } = requestBody; 72 | 73 | const embeddings = new AzureOpenAIEmbeddings(); 74 | 75 | const prompt = `Question: ${question}`; 76 | context.log(`Sending prompt to the model: ${prompt}`); 77 | 78 | const model = new AzureChatOpenAI(); 79 | 80 | const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([ 81 | ['system', "Answer the user's questions based on the below context:\n\n{context}"], 82 | ['human', '{input}'], 83 | ]); 84 | 85 | const combineDocsChain = await createStuffDocumentsChain({ 86 | llm: model, 87 | prompt: questionAnsweringPrompt, 88 | }); 89 | 90 | const store = new AzureCosmosDBVectorStore(embeddings, {}); 91 | 92 | const chain = await createRetrievalChain({ 93 | retriever: store.asRetriever(), 94 | combineDocsChain, 95 | }); 96 | 97 | const response = await chain.stream({ 98 | input: question, 99 | }); 100 | 101 | return { 102 | headers: { 'Content-Type': 'text/plain' }, 103 | body: createStream(response), 104 | }; 105 | } catch (error: unknown) { 106 | const error_ = error as Error; 107 | context.error(`Error when processing chat request: ${error_.message}`); 108 | 109 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 110 | } 111 | } 112 | 113 | function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) { 114 | const buffer = new Readable({ 115 | read() {}, 116 | }); 117 | 118 | const stream = async () => { 119 | for await (const chunk of chunks) { 120 | buffer.push(chunk.answer); 121 | } 122 | 123 | buffer.push(null); 124 | }; 125 | 126 | stream(); 127 | 128 | return buffer; 129 | } 130 | 131 | app.setup({ enableHttpStream: true }); 132 | app.post('chat', { 133 | route: 'chat', 134 | authLevel: 'anonymous', 135 | handler: chat, 136 | }); 137 | ``` 138 | 139 | And that's it! Azure Functions is now enabled to support streaming. 140 | 141 | ## Generating a stream response in the `chat` API 142 | 143 | Now, let's move on and create the logic to generate a stream response in the `chat` API. 144 | 145 | Open the `chat.ts` file and let's make some significant changes: 146 | 147 | - `chat.ts` 148 | 149 | ```typescript 150 | import { Readable } from 'node:stream'; 151 | import { Document } from '@langchain/core/documents'; 152 | import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions'; 153 | import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/azure-openai'; 154 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 155 | import { createStuffDocumentsChain } from 'langchain/chains/combine_documents'; 156 | import { AzureCosmosDBVectorStore } from '@langchain/community/vectorstores/azure_cosmosdb'; 157 | import { createRetrievalChain } from 'langchain/chains/retrieval'; 158 | import 'dotenv/config'; 159 | import { badRequest, serviceUnavailable } from '../utils'; 160 | 161 | export async function chat(request: HttpRequest, context: InvocationContext): Promise { 162 | try { 163 | const requestBody: any = await request.json(); 164 | 165 | if (!requestBody?.question) { 166 | return badRequest(new Error('No question provided')); 167 | } 168 | 169 | const { question } = requestBody; 170 | 171 | const embeddings = new AzureOpenAIEmbeddings(); 172 | 173 | const prompt = `Question: ${question}`; 174 | context.log(`Sending prompt to the model: ${prompt}`); 175 | 176 | const model = new AzureChatOpenAI(); 177 | 178 | const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([ 179 | ['system', "Answer the user's questions based on the below context:\n\n{context}"], 180 | ['human', '{input}'], 181 | ]); 182 | 183 | const combineDocsChain = await createStuffDocumentsChain({ 184 | llm: model, 185 | prompt: questionAnsweringPrompt, 186 | }); 187 | 188 | const store = new AzureCosmosDBVectorStore(embeddings, {}); 189 | 190 | const chain = await createRetrievalChain({ 191 | retriever: store.asRetriever(), 192 | combineDocsChain, 193 | }); 194 | 195 | const response = await chain.stream({ 196 | input: question, 197 | }); 198 | 199 | return { 200 | body: createStream(response), 201 | headers: { 202 | 'Content-Type': 'text/plain', 203 | }, 204 | }; 205 | } catch (error: unknown) { 206 | const error_ = error as Error; 207 | context.error(`Error when processing chat request: ${error_.message}`); 208 | 209 | return serviceUnavailable(new Error('Service temporarily unavailable. Please try again later.')); 210 | } 211 | } 212 | 213 | function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) { 214 | const buffer = new Readable({ 215 | read() {}, 216 | }); 217 | 218 | const stream = async () => { 219 | for await (const chunk of chunks) { 220 | buffer.push(chunk.answer); 221 | } 222 | 223 | buffer.push(null); 224 | }; 225 | 226 | stream(); 227 | 228 | return buffer; 229 | } 230 | 231 | app.setup({ enableHttpStream: true }); 232 | app.post('chat', { 233 | route: 'chat', 234 | authLevel: 'anonymous', 235 | handler: chat, 236 | }); 237 | ``` 238 | 239 | Several changes here, right? But let's understand what has been changed and included here: 240 | 241 | ```typescript 242 | const response = await chain.stream({ 243 | input: question, 244 | }); 245 | ``` 246 | 247 | Before, the `chain` variable was using the `invoke()` method. However, as we now want to generate a stream response, we are using the `stream()` method. And passing the `input` parameter with the question the user asked. 248 | 249 | After that, we're returning the stream response, using the `createStream()` function. 250 | 251 | ```typescript 252 | function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) { 253 | const buffer = new Readable({ 254 | read() {}, 255 | }); 256 | 257 | const stream = async () => { 258 | for await (const chunk of chunks) { 259 | buffer.push(chunk.answer); 260 | } 261 | 262 | buffer.push(null); 263 | }; 264 | 265 | stream(); 266 | 267 | return buffer; 268 | } 269 | 270 | app.setup({ enableHttpStream: true }); 271 | app.post('chat', { 272 | route: 'chat', 273 | authLevel: 'anonymous', 274 | handler: chat, 275 | }); 276 | ``` 277 | 278 | The `createStream()` function is responsible for generating the stream response. It receives an `AsyncIterable` of `{ context: Document[]; answer: string }` as a parameter. And then creates a `Readable` stream, which is an interface for reading data from a stream. 279 | 280 | Note that we are importing: 281 | 282 | - `Document` from the `@langchain/core/documents` package: which is an interface for interacting with a document. 283 | - `Readable` from the `node:stream` package: class that belongs to the `stream` module of Node.js, which is an interface for reading data from a stream. 284 | 285 | ```typescript 286 | return { 287 | headers: { 'Content-Type': 'text/plain' }, 288 | body: createStream(response), 289 | }; 290 | ``` 291 | 292 | And finally, we're returning the stream response using the `createStream()` function. And setting the `Content-Type` header to `text/plain`. 293 | 294 | And that's it! Now the `chat` API is ready to generate stream responses. 295 | 296 | Let's test the `chat` API and see how it behaves when generating a stream response. To do this, open the terminal again in the `api` folder and run the command: 297 | 298 | ```bash 299 | npm run start 300 | ``` 301 | 302 | And then open the `api.http` file and send the `chat` API request, and now see the return of the response using the stream in the gif below: 303 | 304 | ![chat-stream](./images/stream-response.gif) 305 | 306 | Note that when we send the request, the `Response` header shows `Transfer-Encoding: chunked`, which indicates that the response is being sent in chunks. And the response is displayed sequentially, i.e. as the response is generated, it is displayed. 307 | 308 | ![chat-stream-response](./images/stream-response.png) 309 | 310 | And that's it! You've now learned how to generate a stream response in the `chat` API using LangChain.js and the new stream feature that is also available for v4 of the Azure Functions programming model. 311 | -------------------------------------------------------------------------------- /infra/main.bicep: -------------------------------------------------------------------------------- 1 | targetScope = 'subscription' 2 | 3 | @minLength(1) 4 | @maxLength(64) 5 | @description('Name of the the environment which is used to generate a short unique hash used in all resources.') 6 | param environmentName string 7 | 8 | @minLength(1) 9 | @description('Primary location for all resources') 10 | param location string 11 | 12 | param resourceGroupName string = '' 13 | param webappName string = 'webapp' 14 | param apiServiceName string = 'api' 15 | param appServicePlanName string = '' 16 | param storageAccountName string = '' 17 | param cosmosDbServiceName string = '' 18 | 19 | @description('Location for the OpenAI resource group') 20 | @allowed(['australiaeast', 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'japaneast', 'northcentralus', 'swedencentral', 'switzerlandnorth', 'uksouth', 'westeurope']) 21 | @metadata({ 22 | azd: { 23 | type: 'location' 24 | } 25 | }) 26 | param openAiLocation string // Set in main.parameters.json 27 | param openAiSkuName string = 'S0' 28 | param openAiUrl string = '' 29 | param openAiApiVersion string // Set in main.parameters.json 30 | 31 | // Location is not relevant here as it's only for the built-in api 32 | // which is not used here. Static Web App is a global service otherwise 33 | @description('Location for the Static Web App') 34 | @allowed(['westus2', 'centralus', 'eastus2', 'westeurope', 'eastasia', 'eastasiastage']) 35 | @metadata({ 36 | azd: { 37 | type: 'location' 38 | } 39 | }) 40 | param webappLocation string // Set in main.parameters.json 41 | 42 | param chatModelName string // Set in main.parameters.json 43 | param chatDeploymentName string = chatModelName 44 | param chatModelVersion string // Set in main.parameters.json 45 | param chatDeploymentCapacity int = 15 46 | param embeddingsModelName string // Set in main.parameters.json 47 | param embeddingsModelVersion string // Set in main.parameters.json 48 | param embeddingsDeploymentName string = embeddingsModelName 49 | param embeddingsDeploymentCapacity int = 30 50 | 51 | param blobContainerName string = 'files' 52 | 53 | // Id of the user or app to assign application roles 54 | param principalId string = '' 55 | 56 | // Enable enhanced security with VNet integration 57 | param useVnet bool // Set in main.parameters.json 58 | 59 | // Differentiates between automated and manual deployments 60 | param isContinuousDeployment bool // Set in main.parameters.json 61 | 62 | var abbrs = loadJsonContent('abbreviations.json') 63 | var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) 64 | var tags = { 'azd-env-name': environmentName } 65 | var finalOpenAiUrl = empty(openAiUrl) ? 'https://${openAi.outputs.name}.openai.azure.com' : openAiUrl 66 | var storageUrl = 'https://${storage.outputs.name}.blob.${environment().suffixes.storage}' 67 | var apiResourceName = '${abbrs.webSitesFunctions}api-${resourceToken}' 68 | 69 | // Organize resources in a resource group 70 | resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { 71 | name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' 72 | location: location 73 | tags: tags 74 | } 75 | 76 | // The application webapp 77 | module webapp './core/host/staticwebapp.bicep' = { 78 | name: 'webapp' 79 | scope: resourceGroup 80 | params: { 81 | name: !empty(webappName) ? webappName : '${abbrs.webStaticSites}web-${resourceToken}' 82 | location: webappLocation 83 | tags: union(tags, { 'azd-service-name': webappName }) 84 | sku: useVnet ? { 85 | name: 'Standard' 86 | tier: 'Standard' 87 | } : { 88 | name: 'Free' 89 | tier: 'Free' 90 | } 91 | } 92 | } 93 | 94 | // The application backend API 95 | module api './app/api.bicep' = { 96 | name: 'api' 97 | scope: resourceGroup 98 | params: { 99 | name: apiResourceName 100 | location: location 101 | tags: union(tags, { 'azd-service-name': apiServiceName }) 102 | appServicePlanId: appServicePlan.outputs.id 103 | allowedOrigins: [webapp.outputs.uri] 104 | storageAccountName: storage.outputs.name 105 | applicationInsightsName: monitoring.outputs.applicationInsightsName 106 | virtualNetworkSubnetId: useVnet ? vnet.outputs.appSubnetID : '' 107 | staticWebAppName: webapp.outputs.name 108 | appSettings: { 109 | APPINSIGHTS_INSTRUMENTATIONKEY: monitoring.outputs.applicationInsightsInstrumentationKey 110 | AZURE_OPENAI_API_INSTANCE_NAME: openAi.outputs.name 111 | AZURE_OPENAI_API_ENDPOINT: finalOpenAiUrl 112 | AZURE_OPENAI_API_VERSION: openAiApiVersion 113 | AZURE_OPENAI_API_DEPLOYMENT_NAME: chatDeploymentName 114 | AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME: embeddingsDeploymentName 115 | AZURE_COSMOSDB_NOSQL_ENDPOINT: cosmosDb.outputs.endpoint 116 | AZURE_STORAGE_URL: storageUrl 117 | AZURE_STORAGE_CONTAINER_NAME: blobContainerName 118 | } 119 | } 120 | dependsOn: empty(openAiUrl) ? [] : [openAi] 121 | } 122 | 123 | // Compute plan for the Azure Functions API 124 | module appServicePlan './core/host/appserviceplan.bicep' = { 125 | name: 'appserviceplan' 126 | scope: resourceGroup 127 | params: { 128 | name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}' 129 | location: location 130 | tags: tags 131 | sku: useVnet ? { 132 | name: 'FC1' 133 | tier: 'FlexConsumption' 134 | } : { 135 | name: 'Y1' 136 | tier: 'Dynamic' 137 | } 138 | reserved: useVnet ? true : null 139 | } 140 | } 141 | 142 | // Storage for Azure Functions API and Blob storage 143 | module storage './core/storage/storage-account.bicep' = { 144 | name: 'storage' 145 | scope: resourceGroup 146 | params: { 147 | name: !empty(storageAccountName) ? storageAccountName : '${abbrs.storageStorageAccounts}${resourceToken}' 148 | location: location 149 | tags: tags 150 | allowBlobPublicAccess: false 151 | allowSharedKeyAccess: !useVnet 152 | containers: concat([ 153 | { 154 | name: blobContainerName 155 | publicAccess: 'None' 156 | } 157 | ], useVnet ? [ 158 | // Deployment storage container 159 | { 160 | name: apiResourceName 161 | } 162 | ] : []) 163 | networkAcls: useVnet ? { 164 | defaultAction: 'Deny' 165 | bypass: 'AzureServices' 166 | virtualNetworkRules: [ 167 | { 168 | id: vnet.outputs.appSubnetID 169 | action: 'Allow' 170 | } 171 | ] 172 | } : { 173 | bypass: 'AzureServices' 174 | defaultAction: 'Allow' 175 | } 176 | } 177 | } 178 | 179 | // Virtual network for Azure Functions API 180 | module vnet './app/vnet.bicep' = if (useVnet) { 181 | name: 'vnet' 182 | scope: resourceGroup 183 | params: { 184 | name: '${abbrs.networkVirtualNetworks}${resourceToken}' 185 | location: location 186 | tags: tags 187 | } 188 | } 189 | 190 | // Monitor application with Azure Monitor 191 | module monitoring './core/monitor/monitoring.bicep' = { 192 | name: 'monitoring' 193 | scope: resourceGroup 194 | params: { 195 | location: location 196 | tags: tags 197 | logAnalyticsName: '${abbrs.operationalInsightsWorkspaces}${resourceToken}' 198 | applicationInsightsName: '${abbrs.insightsComponents}${resourceToken}' 199 | applicationInsightsDashboardName: '${abbrs.portalDashboards}${resourceToken}' 200 | } 201 | } 202 | 203 | module openAi 'core/ai/cognitiveservices.bicep' = if (empty(openAiUrl)) { 204 | name: 'openai' 205 | scope: resourceGroup 206 | params: { 207 | name: '${abbrs.cognitiveServicesAccounts}${resourceToken}' 208 | location: openAiLocation 209 | tags: tags 210 | sku: { 211 | name: openAiSkuName 212 | } 213 | disableLocalAuth: true 214 | deployments: [ 215 | { 216 | name: chatDeploymentName 217 | model: { 218 | format: 'OpenAI' 219 | name: chatModelName 220 | version: chatModelVersion 221 | } 222 | sku: { 223 | name: 'GlobalStandard' 224 | capacity: chatDeploymentCapacity 225 | } 226 | } 227 | { 228 | name: embeddingsDeploymentName 229 | model: { 230 | format: 'OpenAI' 231 | name: embeddingsModelName 232 | version: embeddingsModelVersion 233 | } 234 | capacity: embeddingsDeploymentCapacity 235 | } 236 | ] 237 | } 238 | } 239 | 240 | module cosmosDb 'br/public:avm/res/document-db/database-account:0.9.0' = { 241 | name: 'cosmosDb' 242 | scope: resourceGroup 243 | params: { 244 | name: !empty(cosmosDbServiceName) ? cosmosDbServiceName : '${abbrs.documentDBDatabaseAccounts}${resourceToken}' 245 | tags: tags 246 | locations: [ 247 | { 248 | locationName: location 249 | failoverPriority: 0 250 | isZoneRedundant: false 251 | } 252 | ] 253 | managedIdentities: { 254 | systemAssigned: true 255 | } 256 | capabilitiesToAdd: [ 257 | 'EnableServerless' 258 | 'EnableNoSQLVectorSearch' 259 | ] 260 | networkRestrictions: { 261 | ipRules: [] 262 | virtualNetworkRules: [] 263 | publicNetworkAccess: 'Enabled' 264 | } 265 | sqlDatabases: [ 266 | { 267 | containers: [ 268 | { 269 | name: 'vectorSearchContainer' 270 | paths: [ 271 | '/id' 272 | ] 273 | } 274 | ] 275 | name: 'vectorSearchDB' 276 | } 277 | { 278 | containers: [ 279 | { 280 | name: 'chatHistoryContainer' 281 | paths: [ 282 | '/userId' 283 | ] 284 | } 285 | ] 286 | name: 'chatHistoryDB' 287 | } 288 | ] 289 | } 290 | } 291 | 292 | module dbRoleDefinition './core/database/cosmos/sql/cosmos-sql-role-def.bicep' = { 293 | scope: resourceGroup 294 | name: 'db-contrib-role-definition' 295 | params: { 296 | accountName: cosmosDb.outputs.name 297 | } 298 | } 299 | 300 | 301 | // Managed identity roles assignation 302 | // --------------------------------------------------------------------------- 303 | 304 | // User roles 305 | module openAiRoleUser 'core/security/role.bicep' = if (!isContinuousDeployment) { 306 | scope: resourceGroup 307 | name: 'openai-role-user' 308 | params: { 309 | principalId: principalId 310 | // Cognitive Services OpenAI User 311 | roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' 312 | principalType: 'User' 313 | } 314 | } 315 | 316 | module storageRoleUser 'core/security/role.bicep' = if (!isContinuousDeployment) { 317 | scope: resourceGroup 318 | name: 'storage-contrib-role-user' 319 | params: { 320 | principalId: principalId 321 | // Storage Blob Data Contributor 322 | roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' 323 | principalType: 'User' 324 | } 325 | } 326 | 327 | module dbContribRoleUser './core/database/cosmos/sql/cosmos-sql-role-assign.bicep' = if (!isContinuousDeployment) { 328 | scope: resourceGroup 329 | name: 'db-contrib-role-user' 330 | params: { 331 | accountName: cosmosDb.outputs.name 332 | principalId: principalId 333 | // Cosmos DB Data Contributor 334 | roleDefinitionId: dbRoleDefinition.outputs.id 335 | } 336 | } 337 | 338 | // System roles 339 | module openAiRoleApi 'core/security/role.bicep' = { 340 | scope: resourceGroup 341 | name: 'openai-role-api' 342 | params: { 343 | principalId: api.outputs.identityPrincipalId 344 | // Cognitive Services OpenAI User 345 | roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' 346 | principalType: 'ServicePrincipal' 347 | } 348 | } 349 | 350 | module storageRoleApi 'core/security/role.bicep' = { 351 | scope: resourceGroup 352 | name: 'storage-role-api' 353 | params: { 354 | principalId: api.outputs.identityPrincipalId 355 | // Storage Blob Data Contributor 356 | roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' 357 | principalType: 'ServicePrincipal' 358 | } 359 | } 360 | 361 | module dbContribRoleApi './core/database/cosmos/sql/cosmos-sql-role-assign.bicep' = { 362 | scope: resourceGroup 363 | name: 'db-contrib-role-api' 364 | params: { 365 | accountName: cosmosDb.outputs.name 366 | principalId: api.outputs.identityPrincipalId 367 | // Cosmos DB Data Contributor 368 | roleDefinitionId: dbRoleDefinition.outputs.id 369 | } 370 | } 371 | 372 | output AZURE_LOCATION string = location 373 | output AZURE_TENANT_ID string = tenant().tenantId 374 | output AZURE_RESOURCE_GROUP string = resourceGroup.name 375 | 376 | output AZURE_OPENAI_API_ENDPOINT string = finalOpenAiUrl 377 | output AZURE_OPENAI_API_INSTANCE_NAME string = openAi.outputs.name 378 | output AZURE_OPENAI_API_VERSION string = openAiApiVersion 379 | output AZURE_OPENAI_API_DEPLOYMENT_NAME string = chatDeploymentName 380 | output AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME string = embeddingsDeploymentName 381 | output AZURE_STORAGE_URL string = storageUrl 382 | output AZURE_STORAGE_CONTAINER_NAME string = blobContainerName 383 | output AZURE_COSMOSDB_NOSQL_ENDPOINT string = cosmosDb.outputs.endpoint 384 | 385 | output API_URL string = useVnet ? '' : api.outputs.uri 386 | output WEBAPP_URL string = webapp.outputs.uri 387 | output UPLOAD_URL string = useVnet ? webapp.outputs.uri : api.outputs.uri 388 | --------------------------------------------------------------------------------