├── .devcontainer
    └── devcontainer.json
├── .editorconfig
├── .eslintrc.json
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── azure.yaml
├── data
    ├── privacy-policy.pdf
    ├── support.pdf
    └── terms-of-service.pdf
├── infra
    ├── abbreviations.json
    ├── core
    │   ├── ai
    │   │   └── cognitiveservices.bicep
    │   ├── host
    │   │   ├── container-app.bicep
    │   │   ├── container-apps-environment.bicep
    │   │   ├── container-apps.bicep
    │   │   ├── container-registry.bicep
    │   │   └── staticwebapp.bicep
    │   ├── monitor
    │   │   ├── applicationinsights.bicep
    │   │   ├── loganalytics.bicep
    │   │   └── monitoring.bicep
    │   ├── search
    │   │   └── search-services.bicep
    │   └── security
    │   │   └── role.bicep
    ├── main.bicep
    └── main.parameters.json
├── package.json
├── scripts
    ├── index-data.ps1
    └── index-data.sh
└── src
    ├── backend
        ├── README.md
        ├── package.json
        ├── src
        │   ├── app.ts
        │   ├── lib
        │   │   ├── index.ts
        │   │   ├── message-builder.ts
        │   │   └── models.ts
        │   ├── plugins
        │   │   ├── README.md
        │   │   ├── chat.ts
        │   │   ├── config.ts
        │   │   └── sensible.ts
        │   └── routes
        │   │   ├── README.md
        │   │   └── root.ts
        ├── test.http
        └── tsconfig.json
    ├── frontend
        ├── .lintstagedrc
        ├── README.md
        ├── assets
        │   ├── lightbulb.svg
        │   ├── new-chat.svg
        │   ├── question.svg
        │   └── send.svg
        ├── index.html
        ├── package.json
        ├── public
        │   └── favicon.ico
        ├── src
        │   ├── api.ts
        │   ├── components
        │   │   ├── chat.ts
        │   │   └── debug.ts
        │   ├── index.ts
        │   ├── message-parser.ts
        │   ├── models.ts
        │   └── vite-env.d.ts
        ├── tsconfig.json
        └── vite.config.ts
    └── indexer
        ├── Dockerfile
        ├── README.md
        ├── bin
            └── index-files.js
        ├── package.json
        ├── src
            ├── app.ts
            ├── lib
            │   ├── cli.ts
            │   ├── document-processor.ts
            │   ├── index.ts
            │   ├── indexer.ts
            │   └── model-limits.ts
            ├── plugins
            │   ├── README.md
            │   ├── azure.ts
            │   ├── config.ts
            │   ├── indexer.ts
            │   ├── multipart.ts
            │   ├── openai.ts
            │   └── sensible.ts
            └── routes
            │   ├── README.md
            │   ├── indexes
            │       └── index.ts
            │   └── root.ts
        ├── test.http
        └── tsconfig.json


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node
 3 | {
 4 |   "name": "OpenAI Workshop",
 5 | 
 6 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 7 |   "image": "mcr.microsoft.com/devcontainers/javascript-node:18-bullseye",
 8 | 
 9 |   // Features to add to the dev container. More info: https://containers.dev/features.
10 |   "features": {
11 |     "ghcr.io/devcontainers/features/node:1": {
12 |       "version": "18"
13 |     },
14 |     "ghcr.io/devcontainers/features/docker-in-docker:1": {
15 |       "version": 20,
16 |       "moby": "false"
17 |     },
18 |     "ghcr.io/devcontainers/features/powershell:1": {},
19 |     "ghcr.io/devcontainers/features/azure-cli:1": {
20 |       "version": "latest",
21 |       "installBicep": true
22 |     },
23 |     "ghcr.io/azure/azure-dev/azd:latest": {}
24 |   },
25 | 
26 |   // Configure tool-specific properties.
27 |   "customizations": {
28 |     "vscode": {
29 |       "extensions": [
30 |         "ms-azuretools.azure-dev",
31 |         "ms-azuretools.vscode-bicep",
32 |         "ms-azuretools.vscode-docker",
33 |         "esbenp.prettier-vscode",
34 |         "humao.rest-client",
35 |         "runem.lit-plugin"
36 |       ]
37 |     }
38 |   },
39 | 
40 |   // Use 'forwardPorts' to make a list of ports inside the container available locally.
41 |   "forwardPorts": [3000, 3001, 8000],
42 | 
43 |   // Use 'postCreateCommand' to run commands after the container is created.
44 |   "postCreateCommand": "npm install && npm install -g @moaw/cli fuzz-run",
45 | 
46 |   // Set minimal host requirements for the container.
47 |   "hostRequirements": {
48 |     "memory": "8gb"
49 |   }
50 | 
51 |   // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
52 |   // "remoteUser": "root"
53 | }
54 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # Editor configuration, see http://editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | charset = utf-8
 6 | end_of_line = lf
 7 | indent_style = space
 8 | indent_size = 2
 9 | insert_final_newline = true
10 | trim_trailing_whitespace = true
11 | 
12 | [*.md]
13 | max_line_length = off
14 | trim_trailing_whitespace = false
15 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "parserOptions": {
 3 |     "ecmaVersion": "latest",
 4 |     "sourceType": "module"
 5 |   },
 6 |   "env": {
 7 |     "es2021": true,
 8 |     "node": true
 9 |   },
10 |   "extends": [
11 |     "eslint:recommended",
12 |     "plugin:@typescript-eslint/recommended",
13 |     "plugin:unicorn/recommended",
14 |     "plugin:n/recommended",
15 |     "plugin:import/recommended",
16 |     "plugin:import/typescript"
17 |   ],
18 |   "parser": "@typescript-eslint/parser",
19 |   "plugins": ["@typescript-eslint"],
20 |   "ignorePatterns": ["dist", "test-dist", "coverage", "docs/slides"],
21 |   "settings": {
22 |     "import/resolver": {
23 |       "node": {
24 |         "extensions": [".js", ".ts"]
25 |       }
26 |     },
27 |     "import/parsers": {
28 |       "@typescript-eslint/parser": [".ts"]
29 |     }
30 |   },
31 |   "root": true,
32 |   "rules": {
33 |     "@typescript-eslint/no-explicit-any": "off",
34 |     "@typescript-eslint/no-unused-vars": "off",
35 |     "@typescript-eslint/consistent-type-imports": [
36 |       "error",
37 |       {
38 |         "prefer": "type-imports",
39 |         "fixStyle": "inline-type-imports"
40 |       }
41 |     ],
42 |     "n/no-extraneous-import": "off",
43 |     "n/no-missing-import": "off",
44 |     "unicorn/no-null": "off",
45 |     "unicorn/prefer-at": "off",
46 |     "unicorn/prefer-query-selector": "off",
47 |     "unicorn/prevent-abbreviations": [
48 |       "error",
49 |       {
50 |         "allowList": {
51 |           "Props": true,
52 |           "i": true
53 |         }
54 |       }
55 |     ],
56 |     "import/default": "off",
57 |     "import/no-unresolved": "off",
58 |     "import/extensions": [
59 |       "error",
60 |       "always",
61 |       {
62 |         "ignorePackages": true
63 |       }
64 |     ],
65 |     "import/namespace": "off",
66 |     "import/named": "off"
67 |   },
68 |   "overrides": [
69 |     {
70 |       "files": ["vite-env.d.ts"],
71 |       "rules": {
72 |         "unicorn/prevent-abbreviations": "off"
73 |       }
74 |     }
75 |   ]
76 | }
77 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled output
 2 | node_modules/
 3 | dist/
 4 | .tmp/
 5 | 
 6 | # Logs
 7 | logs
 8 | *.log
 9 | npm-debug.log*
10 | pnpm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 | lerna-debug.log*
14 | 
15 | # Deployment
16 | *.env
17 | .azure
18 | 
19 | # OS
20 | .DS_Store
21 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Microsoft Corporation.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🤖 azure-openai-rag-workshop
 2 | 
 3 | In this workshop, we will build a chatbot based on OpenAI language models and implementing the Retrieval Augmented Generation (RAG) pattern. You'll use [Fastify](https://fastify.dev) to create a [Node.js](https://nodejs.org/en/) service that leverage [OpenAI SDK](https://platform.openai.com/docs/libraries/) and [LangChain](https://js.langchain.com/) to build a chatbot that will answer questions based on a corpus of documents, as well as a website to test it.
 4 | 
 5 | <!-- Finally, we will deploy everything on Azure with a CI/CD pipeline. -->
 6 | 
 7 | 👉 [See the workshop](https://aka.ms/ws/openai-rag)
 8 | 
 9 | ## Prerequisites
10 | 
11 | - **Node.js v18+**
12 | - **Docker v20+**
13 | - **Azure account**. If you're new to Azure, [get an Azure account for free](https://azure.microsoft.com/free/?WT.mc_id=javascript-0000-cxa) to get free Azure credits to get started.
14 | - **Azure subscription with access enabled for the Azure OpenAI service**. You can request access with [this form](https://aka.ms/oaiapply).
15 | 
16 | You can use [GitHub Codespaces](https://github.com/features/codespaces) to work on this project directly from your browser: select the **Code** button, then the **Codespaces** tab and click on **Create Codespaces on main**.
17 | 
18 | You can also use the [Dev Containers extension for VS Code](https://aka.ms/vscode/ext/devcontainer) to work locally using a ready-to-use dev environment.
19 | 
20 | ## Project details
21 | 
22 | This project is structured as monorepo and makes use of [NPM Workspaces](https://docs.npmjs.com/cli/using-npm/workspaces).
23 | 
24 | ## How to run locally
25 | 
26 | ```bash
27 | npm install
28 | npm start
29 | ```
30 | 
31 | This command will start the frontend and backend services.
32 | For these services to work, you need to have a `.env` file at the root of the project with at least the following content:
33 | 
34 | ```bash
35 | AZURE_SEARCH_SERVICE=<your_azure_cognitive_search_instance_name>
36 | AZURE_OPENAI_URL=<you_openai_instance_url>
37 | ```
38 | 
39 | The application will then be available at `http://localhost:8000`.
40 | 
41 | ## How to build Docker images
42 | 
43 | ```bash
44 | npm run docker:build
45 | ```
46 | 
47 | This command will build the container images for all services.
48 | 
49 | ## How deploy to Azure
50 | 
51 | ```bash
52 | azd auth login
53 | azd up
54 | ```
55 | 
56 | This commands will first ask you to log in into Azure. Then it will provison the Azure resources, package the services and deploy them to Azure.
57 | 
58 | ## References
59 | 
60 | This workshop is based on the enterprise-ready sample **ChatGPT + Enterprise data with Azure OpenAI and Cognitive Search**:
61 | 
62 | - [JavaScript version](https://github.com/Azure-Samples/azure-search-openai-javascript)
63 | - [Python version](https://github.com/Azure-Samples/azure-search-openai-demo/)
64 | - [Java version](https://github.com/Azure-Samples/azure-search-openai-demo-java)
65 | - [C# version](https://github.com/Azure-Samples/azure-search-openai-demo-csharp)
66 | 
67 | If you want to go further with more advanced use-cases, authentication, history and more, you should check it out!
68 | 
69 | ## Contributing
70 | 
71 | This project welcomes contributions and suggestions. Most contributions require you to agree to a
72 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
73 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
74 | 
75 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
76 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
77 | provided by the bot. You will only need to do this once across all repos using our CLA.
78 | 
79 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
80 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
81 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
82 | 
83 | ## Trademarks
84 | 
85 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
86 | trademarks or logos is subject to and must follow
87 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
88 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
89 | Any use of third-party trademarks or logos are subject to those third-party's policies.
90 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | - Full paths of source file(s) related to the manifestation of the issue
23 | - The location of the affected source code (tag/branch/commit or direct URL)
24 | - Any special configuration required to reproduce the issue
25 | - Step-by-step instructions to reproduce the issue
26 | - Proof-of-concept or exploit code (if possible)
27 | - Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help
 4 | 
 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or
 7 | feature request as a new Issue.
 8 | 
 9 | For help and questions about using this project, please use GitHub Issues and tag them with the
10 | **question** label.
11 | 
12 | ## Microsoft Support Policy
13 | 
14 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
15 | 


--------------------------------------------------------------------------------
/azure.yaml:
--------------------------------------------------------------------------------
 1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
 2 | 
 3 | name: azure-openai-rag-workshop
 4 | metadata:
 5 |   template: azure-openai-rag-workshop@1.0.0
 6 | 
 7 | services:
 8 |   frontend:
 9 |     project: ./src/frontend
10 |     dist: dist
11 |     language: ts
12 |     host: staticwebapp
13 |     hooks:
14 |       predeploy:
15 |         windows:
16 |           shell: pwsh
17 |           run: Export-ModuleMember -Variable BACKEND_API_URI && npm run build
18 |           interactive: true
19 |           continueOnError: false
20 |         posix:
21 |           shell: sh
22 |           run: export BACKEND_API_URI && npm run build
23 |           interactive: true
24 |           continueOnError: false
25 | 
26 |   backend:
27 |     project: ./src/backend
28 |     language: ts
29 |     host: containerapp
30 |     docker:
31 |       context: ../..
32 | 
33 |   indexer:
34 |     project: ./src/indexer
35 |     language: ts
36 |     host: containerapp
37 |     docker:
38 |       context: ../..
39 | 
40 | hooks:
41 |   postup:
42 |     windows:
43 |       shell: pwsh
44 |       run: ./scripts/index-data.ps1
45 |       interactive: true
46 |       continueOnError: false
47 |     posix:
48 |       shell: sh
49 |       run: ./scripts/index-data.sh
50 |       interactive: true
51 |       continueOnError: false
52 | 


--------------------------------------------------------------------------------
/data/privacy-policy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-openai-rag-workshop-template/5fb3bee73fbc067489f98d0a867bea504ad99cb3/data/privacy-policy.pdf


--------------------------------------------------------------------------------
/data/support.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-openai-rag-workshop-template/5fb3bee73fbc067489f98d0a867bea504ad99cb3/data/support.pdf


--------------------------------------------------------------------------------
/data/terms-of-service.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-openai-rag-workshop-template/5fb3bee73fbc067489f98d0a867bea504ad99cb3/data/terms-of-service.pdf


--------------------------------------------------------------------------------
/infra/abbreviations.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "analysisServicesServers": "as",
  3 |   "apiManagementService": "apim-",
  4 |   "appConfigurationConfigurationStores": "appcs-",
  5 |   "appManagedEnvironments": "cae-",
  6 |   "appContainerApps": "ca-",
  7 |   "authorizationPolicyDefinitions": "policy-",
  8 |   "automationAutomationAccounts": "aa-",
  9 |   "blueprintBlueprints": "bp-",
 10 |   "blueprintBlueprintsArtifacts": "bpa-",
 11 |   "cacheRedis": "redis-",
 12 |   "cdnProfiles": "cdnp-",
 13 |   "cdnProfilesEndpoints": "cdne-",
 14 |   "cognitiveServicesAccounts": "cog-",
 15 |   "cognitiveServicesFormRecognizer": "cog-fr-",
 16 |   "cognitiveServicesTextAnalytics": "cog-ta-",
 17 |   "computeAvailabilitySets": "avail-",
 18 |   "computeCloudServices": "cld-",
 19 |   "computeDiskEncryptionSets": "des",
 20 |   "computeDisks": "disk",
 21 |   "computeDisksOs": "osdisk",
 22 |   "computeGalleries": "gal",
 23 |   "computeSnapshots": "snap-",
 24 |   "computeVirtualMachines": "vm",
 25 |   "computeVirtualMachineScaleSets": "vmss-",
 26 |   "containerInstanceContainerGroups": "ci",
 27 |   "containerRegistryRegistries": "cr",
 28 |   "containerServiceManagedClusters": "aks-",
 29 |   "databricksWorkspaces": "dbw-",
 30 |   "dataFactoryFactories": "adf-",
 31 |   "dataLakeAnalyticsAccounts": "dla",
 32 |   "dataLakeStoreAccounts": "dls",
 33 |   "dataMigrationServices": "dms-",
 34 |   "dBforMySQLServers": "mysql-",
 35 |   "dBforPostgreSQLServers": "psql-",
 36 |   "devicesIotHubs": "iot-",
 37 |   "devicesProvisioningServices": "provs-",
 38 |   "devicesProvisioningServicesCertificates": "pcert-",
 39 |   "documentDBDatabaseAccounts": "cosmos-",
 40 |   "eventGridDomains": "evgd-",
 41 |   "eventGridDomainsTopics": "evgt-",
 42 |   "eventGridEventSubscriptions": "evgs-",
 43 |   "eventHubNamespaces": "evhns-",
 44 |   "eventHubNamespacesEventHubs": "evh-",
 45 |   "hdInsightClustersHadoop": "hadoop-",
 46 |   "hdInsightClustersHbase": "hbase-",
 47 |   "hdInsightClustersKafka": "kafka-",
 48 |   "hdInsightClustersMl": "mls-",
 49 |   "hdInsightClustersSpark": "spark-",
 50 |   "hdInsightClustersStorm": "storm-",
 51 |   "hybridComputeMachines": "arcs-",
 52 |   "insightsActionGroups": "ag-",
 53 |   "insightsComponents": "appi-",
 54 |   "keyVaultVaults": "kv-",
 55 |   "kubernetesConnectedClusters": "arck",
 56 |   "kustoClusters": "dec",
 57 |   "kustoClustersDatabases": "dedb",
 58 |   "logicIntegrationAccounts": "ia-",
 59 |   "logicWorkflows": "logic-",
 60 |   "machineLearningServicesWorkspaces": "mlw-",
 61 |   "managedIdentityUserAssignedIdentities": "id-",
 62 |   "managementManagementGroups": "mg-",
 63 |   "migrateAssessmentProjects": "migr-",
 64 |   "networkApplicationGateways": "agw-",
 65 |   "networkApplicationSecurityGroups": "asg-",
 66 |   "networkAzureFirewalls": "afw-",
 67 |   "networkBastionHosts": "bas-",
 68 |   "networkConnections": "con-",
 69 |   "networkDnsZones": "dnsz-",
 70 |   "networkExpressRouteCircuits": "erc-",
 71 |   "networkFirewallPolicies": "afwp-",
 72 |   "networkFirewallPoliciesWebApplication": "waf",
 73 |   "networkFirewallPoliciesRuleGroups": "wafrg",
 74 |   "networkFrontDoors": "fd-",
 75 |   "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-",
 76 |   "networkLoadBalancersExternal": "lbe-",
 77 |   "networkLoadBalancersInternal": "lbi-",
 78 |   "networkLoadBalancersInboundNatRules": "rule-",
 79 |   "networkLocalNetworkGateways": "lgw-",
 80 |   "networkNatGateways": "ng-",
 81 |   "networkNetworkInterfaces": "nic-",
 82 |   "networkNetworkSecurityGroups": "nsg-",
 83 |   "networkNetworkSecurityGroupsSecurityRules": "nsgsr-",
 84 |   "networkNetworkWatchers": "nw-",
 85 |   "networkPrivateDnsZones": "pdnsz-",
 86 |   "networkPrivateLinkServices": "pl-",
 87 |   "networkPublicIPAddresses": "pip-",
 88 |   "networkPublicIPPrefixes": "ippre-",
 89 |   "networkRouteFilters": "rf-",
 90 |   "networkRouteTables": "rt-",
 91 |   "networkRouteTablesRoutes": "udr-",
 92 |   "networkTrafficManagerProfiles": "traf-",
 93 |   "networkVirtualNetworkGateways": "vgw-",
 94 |   "networkVirtualNetworks": "vnet-",
 95 |   "networkVirtualNetworksSubnets": "snet-",
 96 |   "networkVirtualNetworksVirtualNetworkPeerings": "peer-",
 97 |   "networkVirtualWans": "vwan-",
 98 |   "networkVpnGateways": "vpng-",
 99 |   "networkVpnGatewaysVpnConnections": "vcn-",
100 |   "networkVpnGatewaysVpnSites": "vst-",
101 |   "notificationHubsNamespaces": "ntfns-",
102 |   "notificationHubsNamespacesNotificationHubs": "ntf-",
103 |   "operationalInsightsWorkspaces": "log-",
104 |   "portalDashboards": "dash-",
105 |   "powerBIDedicatedCapacities": "pbi-",
106 |   "purviewAccounts": "pview-",
107 |   "recoveryServicesVaults": "rsv-",
108 |   "resourcesResourceGroups": "rg-",
109 |   "searchSearchServices": "srch-",
110 |   "serviceBusNamespaces": "sb-",
111 |   "serviceBusNamespacesQueues": "sbq-",
112 |   "serviceBusNamespacesTopics": "sbt-",
113 |   "serviceEndPointPolicies": "se-",
114 |   "serviceFabricClusters": "sf-",
115 |   "signalRServiceSignalR": "sigr",
116 |   "sqlManagedInstances": "sqlmi-",
117 |   "sqlServers": "sql-",
118 |   "sqlServersDataWarehouse": "sqldw-",
119 |   "sqlServersDatabases": "sqldb-",
120 |   "sqlServersDatabasesStretch": "sqlstrdb-",
121 |   "storageStorageAccounts": "st",
122 |   "storageStorageAccountsVm": "stvm",
123 |   "storSimpleManagers": "ssimp",
124 |   "streamAnalyticsCluster": "asa-",
125 |   "synapseWorkspaces": "syn",
126 |   "synapseWorkspacesAnalyticsWorkspaces": "synw",
127 |   "synapseWorkspacesSqlPoolsDedicated": "syndp",
128 |   "synapseWorkspacesSqlPoolsSpark": "synsp",
129 |   "timeSeriesInsightsEnvironments": "tsi-",
130 |   "webServerFarms": "plan-",
131 |   "webSitesAppService": "app-",
132 |   "webSitesAppServiceEnvironment": "ase-",
133 |   "webSitesFunctions": "func-",
134 |   "webStaticSites": "stapp-"
135 | }
136 | 


--------------------------------------------------------------------------------
/infra/core/ai/cognitiveservices.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param customSubDomainName string = name
 6 | param deployments array = []
 7 | param kind string = 'OpenAI'
 8 | param publicNetworkAccess string = 'Enabled'
 9 | param sku object = {
10 |   name: 'S0'
11 | }
12 | 
13 | resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = {
14 |   name: name
15 |   location: location
16 |   tags: tags
17 |   kind: kind
18 |   properties: {
19 |     customSubDomainName: customSubDomainName
20 |     publicNetworkAccess: publicNetworkAccess
21 |   }
22 |   sku: sku
23 | }
24 | 
25 | @batchSize(1)
26 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: {
27 |   parent: account
28 |   name: deployment.name
29 |   properties: {
30 |     model: deployment.model
31 |     raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
32 |   }
33 |   sku: contains(deployment, 'sku') ? deployment.sku : {
34 |     name: 'Standard'
35 |     capacity: 20
36 |   }
37 | }]
38 | 
39 | output endpoint string = account.properties.endpoint
40 | output id string = account.id
41 | output name string = account.name
42 | 


--------------------------------------------------------------------------------
/infra/core/host/container-app.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param containerAppsEnvironmentName string = ''
 6 | param containerName string = 'main'
 7 | param containerRegistryName string = ''
 8 | param env array = []
 9 | param secrets array = []
10 | param external bool = true
11 | param imageName string
12 | param keyVaultName string = ''
13 | param managedIdentity bool = !empty(keyVaultName)
14 | param targetPort int = 80
15 | param allowedOrigins array = []
16 | 
17 | @description('CPU cores allocated to a single container instance, e.g. 0.5')
18 | param containerCpuCoreCount string = '0.5'
19 | 
20 | @description('Memory allocated to a single container instance, e.g. 1Gi')
21 | param containerMemory string = '1.0Gi'
22 | 
23 | resource app 'Microsoft.App/containerApps@2023-05-01' = {
24 |   name: name
25 |   location: location
26 |   tags: tags
27 |   identity: { type: managedIdentity ? 'SystemAssigned' : 'None' }
28 |   properties: {
29 |     managedEnvironmentId: containerAppsEnvironment.id
30 |     configuration: {
31 |       activeRevisionsMode: 'single'
32 |       ingress: {
33 |         external: external
34 |         targetPort: targetPort
35 |         transport: 'auto'
36 |         corsPolicy: {
37 |           allowedOrigins: empty(allowedOrigins) ? ['*'] : allowedOrigins
38 |         }
39 |       }
40 |       secrets: concat(secrets, [
41 |         {
42 |           name: 'registry-password'
43 |           value: containerRegistry.listCredentials().passwords[0].value
44 |         }
45 |       ])
46 |       registries: [
47 |         {
48 |           server: '${containerRegistry.name}.azurecr.io'
49 |           username: containerRegistry.name
50 |           passwordSecretRef: 'registry-password'
51 |         }
52 |       ]
53 |     }
54 |     template: {
55 |       containers: [
56 |         {
57 |           image: imageName
58 |           name: containerName
59 |           env: env
60 |           resources: {
61 |             cpu: json(containerCpuCoreCount)
62 |             memory: containerMemory
63 |           }
64 |         }
65 |       ]
66 |       scale: {
67 |         minReplicas: 1
68 |         maxReplicas: 10
69 |       }
70 |     }
71 |   }
72 |   dependsOn: [
73 |     containerRegistry
74 |   ]
75 | }
76 | 
77 | resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2022-03-01' existing = {
78 |   name: containerAppsEnvironmentName
79 | }
80 | 
81 | // 2022-02-01-preview needed for anonymousPullEnabled
82 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' existing = {
83 |   name: containerRegistryName
84 | }
85 | 
86 | output identityPrincipalId string = managedIdentity ? app.identity.principalId : ''
87 | output imageName string = imageName
88 | output name string = app.name
89 | output uri string = 'https://${app.properties.configuration.ingress.fqdn}'
90 | 


--------------------------------------------------------------------------------
/infra/core/host/container-apps-environment.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param logAnalyticsWorkspaceName string
 6 | 
 7 | resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2022-03-01' = {
 8 |   name: name
 9 |   location: location
10 |   tags: tags
11 |   properties: {
12 |     appLogsConfiguration: {
13 |       destination: 'log-analytics'
14 |       logAnalyticsConfiguration: {
15 |         customerId: logAnalyticsWorkspace.properties.customerId
16 |         sharedKey: logAnalyticsWorkspace.listKeys().primarySharedKey
17 |       }
18 |     }
19 |   }
20 | }
21 | 
22 | resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = {
23 |   name: logAnalyticsWorkspaceName
24 | }
25 | 
26 | output name string = containerAppsEnvironment.name
27 | 


--------------------------------------------------------------------------------
/infra/core/host/container-apps.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param containerAppsEnvironmentName string = ''
 6 | param containerRegistryName string = ''
 7 | param logAnalyticsWorkspaceName string = ''
 8 | 
 9 | module containerAppsEnvironment 'container-apps-environment.bicep' = {
10 |   name: '${name}-container-apps-environment'
11 |   params: {
12 |     name: containerAppsEnvironmentName
13 |     location: location
14 |     tags: tags
15 |     logAnalyticsWorkspaceName: logAnalyticsWorkspaceName
16 |   }
17 | }
18 | 
19 | module containerRegistry 'container-registry.bicep' = {
20 |   name: '${name}-container-registry'
21 |   params: {
22 |     name: containerRegistryName
23 |     location: location
24 |     tags: tags
25 |   }
26 | }
27 | 
28 | output environmentName string = containerAppsEnvironment.outputs.name
29 | output registryLoginServer string = containerRegistry.outputs.loginServer
30 | output registryName string = containerRegistry.outputs.name
31 | 


--------------------------------------------------------------------------------
/infra/core/host/container-registry.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param adminUserEnabled bool = true
 6 | param anonymousPullEnabled bool = false
 7 | param dataEndpointEnabled bool = false
 8 | param encryption object = {
 9 |   status: 'disabled'
10 | }
11 | param networkRuleBypassOptions string = 'AzureServices'
12 | param publicNetworkAccess string = 'Enabled'
13 | param sku object = {
14 |   name: 'Basic'
15 | }
16 | param zoneRedundancy string = 'Disabled'
17 | 
18 | @description('The log analytics workspace id used for logging & monitoring')
19 | param workspaceId string = ''
20 | 
21 | // 2022-02-01-preview needed for anonymousPullEnabled
22 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' = {
23 |   name: name
24 |   location: location
25 |   tags: tags
26 |   sku: sku
27 |   properties: {
28 |     adminUserEnabled: adminUserEnabled
29 |     anonymousPullEnabled: anonymousPullEnabled
30 |     dataEndpointEnabled: dataEndpointEnabled
31 |     encryption: encryption
32 |     networkRuleBypassOptions: networkRuleBypassOptions
33 |     publicNetworkAccess: publicNetworkAccess
34 |     zoneRedundancy: zoneRedundancy
35 |   }
36 | }
37 | 
38 | // TODO: Update diagnostics to be its own module
39 | // Blocking issue: https://github.com/Azure/bicep/issues/622
40 | // Unable to pass in a `resource` scope or unable to use string interpolation in resource types
41 | resource diagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = if (!empty(workspaceId)) {
42 |   name: 'registry-diagnostics'
43 |   scope: containerRegistry
44 |   properties: {
45 |     workspaceId: workspaceId
46 |     logs: [
47 |       {
48 |         category: 'ContainerRegistryRepositoryEvents'
49 |         enabled: true
50 |       }
51 |       {
52 |         category: 'ContainerRegistryLoginEvents'
53 |         enabled: true
54 |       }
55 |     ]
56 |     metrics: [
57 |       {
58 |         category: 'AllMetrics'
59 |         enabled: true
60 |         timeGrain: 'PT1M'
61 |       }
62 |     ]
63 |   }
64 | }
65 | 
66 | output loginServer string = containerRegistry.properties.loginServer
67 | output name string = containerRegistry.name
68 | 


--------------------------------------------------------------------------------
/infra/core/host/staticwebapp.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param sku object = {
 6 |   name: 'Free'
 7 |   tier: 'Free'
 8 | }
 9 | 
10 | resource web 'Microsoft.Web/staticSites@2022-03-01' = {
11 |   name: name
12 |   location: location
13 |   tags: tags
14 |   sku: sku
15 |   properties: {
16 |     provider: 'SwaCli'
17 |   }
18 | }
19 | 
20 | output name string = web.name
21 | output uri string = 'https://${web.properties.defaultHostname}'
22 | 


--------------------------------------------------------------------------------
/infra/core/monitor/applicationinsights.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = {
 6 |   name: name
 7 |   location: location
 8 |   tags: tags
 9 |   kind: 'web'
10 |   properties: {
11 |     Application_Type: 'web'
12 |   }
13 | }
14 | 
15 | output connectionString string = applicationInsights.properties.ConnectionString
16 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey
17 | output name string = applicationInsights.name
18 | 


--------------------------------------------------------------------------------
/infra/core/monitor/loganalytics.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = {
 6 |   name: name
 7 |   location: location
 8 |   tags: tags
 9 |   properties: any({
10 |     retentionInDays: 30
11 |     features: {
12 |       searchVersion: 1
13 |     }
14 |     sku: {
15 |       name: 'PerGB2018'
16 |     }
17 |   })
18 | }
19 | 
20 | output id string = logAnalytics.id
21 | output name string = logAnalytics.name
22 | 


--------------------------------------------------------------------------------
/infra/core/monitor/monitoring.bicep:
--------------------------------------------------------------------------------
 1 | param logAnalyticsName string
 2 | param applicationInsightsName string
 3 | param location string = resourceGroup().location
 4 | param tags object = {}
 5 | 
 6 | var useApplicationInsights = !empty(applicationInsightsName)
 7 | 
 8 | module logAnalytics 'loganalytics.bicep' = {
 9 |   name: 'loganalytics'
10 |   params: {
11 |     name: logAnalyticsName
12 |     location: location
13 |     tags: tags
14 |   }
15 | }
16 | 
17 | module applicationInsights 'applicationinsights.bicep' = if (useApplicationInsights) {
18 |   name: 'applicationinsights'
19 |   params: {
20 |     name: applicationInsightsName
21 |     location: location
22 |     tags: tags
23 |   }
24 | }
25 | 
26 | output applicationInsightsConnectionString string =  useApplicationInsights ? applicationInsights.outputs.connectionString : ''
27 | output applicationInsightsInstrumentationKey string = useApplicationInsights ? applicationInsights.outputs.instrumentationKey : ''
28 | output applicationInsightsName string = useApplicationInsights ? applicationInsights.outputs.name : ''
29 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name
30 | 


--------------------------------------------------------------------------------
/infra/core/search/search-services.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | param sku object = {
 6 |   name: 'standard'
 7 | }
 8 | 
 9 | param authOptions object = {}
10 | param semanticSearch string = 'disabled'
11 | 
12 | resource search 'Microsoft.Search/searchServices@2021-04-01-preview' = {
13 |   name: name
14 |   location: location
15 |   tags: tags
16 |   identity: {
17 |     type: 'SystemAssigned'
18 |   }
19 |   properties: {
20 |     authOptions: authOptions
21 |     disableLocalAuth: false
22 |     disabledDataExfiltrationOptions: []
23 |     encryptionWithCmk: {
24 |       enforcement: 'Unspecified'
25 |     }
26 |     hostingMode: 'default'
27 |     networkRuleSet: {
28 |       bypass: 'None'
29 |       ipRules: []
30 |     }
31 |     partitionCount: 1
32 |     publicNetworkAccess: 'Enabled'
33 |     replicaCount: 1
34 |     semanticSearch: semanticSearch
35 |   }
36 |   sku: sku
37 | }
38 | 
39 | output id string = search.id
40 | output endpoint string = 'https://${name}.search.windows.net/'
41 | output name string = search.name
42 | 


--------------------------------------------------------------------------------
/infra/core/security/role.bicep:
--------------------------------------------------------------------------------
 1 | param principalId string
 2 | 
 3 | @allowed([
 4 |   'Device'
 5 |   'ForeignGroup'
 6 |   'Group'
 7 |   'ServicePrincipal'
 8 |   'User'
 9 | ])
10 | param principalType string = 'ServicePrincipal'
11 | param roleDefinitionId string
12 | 
13 | resource role 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
14 |   name: guid(subscription().id, resourceGroup().id, principalId, roleDefinitionId)
15 |   properties: {
16 |     principalId: principalId
17 |     principalType: principalType
18 |     roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionId)
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/infra/main.bicep:
--------------------------------------------------------------------------------
  1 | targetScope = 'subscription'
  2 | 
  3 | @minLength(1)
  4 | @maxLength(64)
  5 | @description('Name of the the environment which is used to generate a short unique hash used in all resources.')
  6 | param environmentName string
  7 | 
  8 | @minLength(1)
  9 | @description('Primary location for all resources')
 10 | param location string
 11 | 
 12 | param resourceGroupName string = ''
 13 | param frontendName string = 'frontend'
 14 | param backendApiName string = 'backend'
 15 | param backendApiImageName string = ''
 16 | param indexerApiName string = 'indexer'
 17 | param indexerApiImageName string = ''
 18 | 
 19 | // The free tier does not support managed identity (required) or semantic search (optional)
 20 | @allowed(['basic', 'standard', 'standard2', 'standard3', 'storage_optimized_l1', 'storage_optimized_l2'])
 21 | param searchServiceSkuName string // Set in main.parameters.json
 22 | param searchIndexName string // Set in main.parameters.json
 23 | 
 24 | @description('Location for the OpenAI resource group')
 25 | @allowed(['australiaeast', 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'japaneast', 'northcentralus', 'swedencentral', 'switzerlandnorth', 'uksouth', 'westeurope'])
 26 | @metadata({
 27 |   azd: {
 28 |     type: 'location'
 29 |   }
 30 | })
 31 | param openAiLocation string // Set in main.parameters.json
 32 | param openAiUrl string = ''
 33 | param openAiSkuName string = 'S0'
 34 | 
 35 | // Location is not relevant here as it's only for the built-in api
 36 | // which is not used here. Static Web App is a global service otherwise
 37 | @description('Location for the Static Web App')
 38 | @allowed(['westus2', 'centralus', 'eastus2', 'westeurope', 'eastasia', 'eastasiastage'])
 39 | @metadata({
 40 |   azd: {
 41 |     type: 'location'
 42 |   }
 43 | })
 44 | param frontendLocation string = 'eastus2'
 45 | 
 46 | param chatGptDeploymentName string // Set in main.parameters.json
 47 | param chatGptDeploymentCapacity int = 30
 48 | param chatGptModelName string = 'gpt-35-turbo'
 49 | param chatGptModelVersion string = '0613'
 50 | param embeddingDeploymentName string = 'embedding'
 51 | param embeddingDeploymentCapacity int = 30
 52 | param embeddingModelName string = 'text-embedding-ada-002'
 53 | 
 54 | @description('Id of the user or app to assign application roles')
 55 | param principalId string = ''
 56 | 
 57 | @description('Use Application Insights for monitoring and performance tracing')
 58 | param useApplicationInsights bool = false
 59 | 
 60 | var abbrs = loadJsonContent('abbreviations.json')
 61 | var resourceToken = toLower(uniqueString(subscription().id, environmentName, location))
 62 | var tags = { 'azd-env-name': environmentName }
 63 | var finalOpenAiUrl = empty(openAiUrl) ? 'https://${openAi.outputs.name}.openai.azure.com' : openAiUrl
 64 | 
 65 | // Organize resources in a resource group
 66 | resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
 67 |   name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}'
 68 |   location: location
 69 |   tags: tags
 70 | }
 71 | 
 72 | // Monitor application with Azure Monitor
 73 | module monitoring './core/monitor/monitoring.bicep' = {
 74 |   name: 'monitoring'
 75 |   scope: resourceGroup
 76 |   params: {
 77 |     location: location
 78 |     tags: tags
 79 |     logAnalyticsName: '${abbrs.operationalInsightsWorkspaces}${resourceToken}'
 80 |     applicationInsightsName: useApplicationInsights ? '${abbrs.insightsComponents}${resourceToken}' : ''
 81 |   }
 82 | }
 83 | 
 84 | // Container apps host (including container registry)
 85 | module containerApps './core/host/container-apps.bicep' = {
 86 |   name: 'container-apps'
 87 |   scope: resourceGroup
 88 |   params: {
 89 |     name: 'containerapps'
 90 |     containerAppsEnvironmentName: '${abbrs.appManagedEnvironments}${resourceToken}'
 91 |     containerRegistryName: '${abbrs.containerRegistryRegistries}${resourceToken}'
 92 |     location: location
 93 |     tags: tags
 94 |     logAnalyticsWorkspaceName: monitoring.outputs.logAnalyticsWorkspaceName
 95 |   }
 96 | }
 97 | 
 98 | // The application frontend
 99 | module frontend './core/host/staticwebapp.bicep' = {
100 |   name: 'frontend'
101 |   scope: resourceGroup
102 |   params: {
103 |     name: !empty(frontendName) ? frontendName : '${abbrs.webStaticSites}web-${resourceToken}'
104 |     location: frontendLocation
105 |     tags: union(tags, { 'azd-service-name': frontendName })
106 |   }
107 | }
108 | 
109 | // The backend API
110 | module backendApi './core/host/container-app.bicep' = {
111 |   name: 'backend-api'
112 |   scope: resourceGroup
113 |   params: {
114 |     name: !empty(backendApiName) ? backendApiName : '${abbrs.appContainerApps}search-${resourceToken}'
115 |     location: location
116 |     tags: union(tags, { 'azd-service-name': backendApiName })
117 |     containerAppsEnvironmentName: containerApps.outputs.environmentName
118 |     containerRegistryName: containerApps.outputs.registryName
119 |     managedIdentity: true
120 |     containerCpuCoreCount: '1.0'
121 |     containerMemory: '2.0Gi'
122 |     secrets: useApplicationInsights ? [
123 |       {
124 |         name: 'appinsights-cs'
125 |         value: monitoring.outputs.applicationInsightsConnectionString
126 |       }
127 |     ] : []
128 |     env: concat([
129 |       {
130 |         name: 'AZURE_OPENAI_CHATGPT_DEPLOYMENT'
131 |         value: chatGptDeploymentName
132 |       }
133 |       {
134 |         name: 'AZURE_OPENAI_CHATGPT_MODEL'
135 |         value: chatGptModelName
136 |       }
137 |       {
138 |         name: 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT'
139 |         value: embeddingDeploymentName
140 |       }
141 |       {
142 |         name: 'AZURE_OPENAI_EMBEDDING_MODEL'
143 |         value: embeddingModelName
144 |       }
145 |       {
146 |         name: 'AZURE_OPENAI_URL'
147 |         value: finalOpenAiUrl
148 |       }
149 |       {
150 |         name: 'AZURE_SEARCH_INDEX'
151 |         value: searchIndexName
152 |       }
153 |       {
154 |         name: 'AZURE_SEARCH_SERVICE'
155 |         value: searchService.outputs.name
156 |       }
157 |     ], useApplicationInsights ? [{
158 |       name: 'APPLICATIONINSIGHTS_CONNECTION_STRING'
159 |       secretRef: 'appinsights-cs'
160 |     }] : [])
161 |     imageName: !empty(backendApiImageName) ? backendApiImageName : 'nginx:latest'
162 |     targetPort: 3000
163 |   }
164 | }
165 | 
166 | // The indexer API
167 | module indexerApi './core/host/container-app.bicep' = {
168 |   name: 'indexer-api'
169 |   scope: resourceGroup
170 |   params: {
171 |     name: !empty(indexerApiName) ? indexerApiName : '${abbrs.appContainerApps}indexer-${resourceToken}'
172 |     location: location
173 |     tags: union(tags, { 'azd-service-name': indexerApiName })
174 |     containerAppsEnvironmentName: containerApps.outputs.environmentName
175 |     containerRegistryName: containerApps.outputs.registryName
176 |     managedIdentity: true
177 |     containerCpuCoreCount: '1.0'
178 |     containerMemory: '2.0Gi'
179 |     secrets: useApplicationInsights ? [
180 |       {
181 |         name: 'appinsights-cs'
182 |         value: monitoring.outputs.applicationInsightsConnectionString
183 |       }
184 |     ] : []
185 |     env: concat([
186 |       {
187 |         name: 'AZURE_OPENAI_CHATGPT_DEPLOYMENT'
188 |         value: chatGptDeploymentName
189 |       }
190 |       {
191 |         name: 'AZURE_OPENAI_CHATGPT_MODEL'
192 |         value: chatGptModelName
193 |       }
194 |       {
195 |         name: 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT'
196 |         value: embeddingDeploymentName
197 |       }
198 |       {
199 |         name: 'AZURE_OPENAI_EMBEDDING_MODEL'
200 |         value: embeddingModelName
201 |       }
202 |       {
203 |         name: 'AZURE_OPENAI_URL'
204 |         value: finalOpenAiUrl
205 |       }
206 |       {
207 |         name: 'AZURE_SEARCH_INDEX'
208 |         value: searchIndexName
209 |       }
210 |       {
211 |         name: 'AZURE_SEARCH_SERVICE'
212 |         value: searchService.outputs.name
213 |       }
214 |     ], useApplicationInsights ? [{
215 |       name: 'APPLICATIONINSIGHTS_CONNECTION_STRING'
216 |       secretRef: 'appinsights-cs'
217 |     }] : [])
218 |     imageName: !empty(indexerApiImageName) ? indexerApiImageName : 'nginx:latest'
219 |     targetPort: 3001
220 |   }
221 | }
222 | 
223 | module openAi 'core/ai/cognitiveservices.bicep' = if (empty(openAiUrl)) {
224 |   name: 'openai'
225 |   scope: resourceGroup
226 |   params: {
227 |     name: '${abbrs.cognitiveServicesAccounts}${resourceToken}'
228 |     location: openAiLocation
229 |     tags: tags
230 |     sku: {
231 |       name: openAiSkuName
232 |     }
233 |     deployments: [
234 |       {
235 |         name: chatGptDeploymentName
236 |         model: {
237 |           format: 'OpenAI'
238 |           name: chatGptModelName
239 |           version: chatGptModelVersion
240 |         }
241 |         sku: {
242 |           name: 'Standard'
243 |           capacity: chatGptDeploymentCapacity
244 |         }
245 |       }
246 |       {
247 |         name: embeddingDeploymentName
248 |         model: {
249 |           format: 'OpenAI'
250 |           name: embeddingModelName
251 |           version: '2'
252 |         }
253 |         capacity: embeddingDeploymentCapacity
254 |       }
255 |     ]
256 |   }
257 | }
258 | 
259 | module searchService 'core/search/search-services.bicep' = {
260 |   name: 'search-service'
261 |   scope: resourceGroup
262 |   params: {
263 |     name: 'gptkb-${resourceToken}'
264 |     location: location
265 |     tags: tags
266 |     authOptions: {
267 |       aadOrApiKey: {
268 |         aadAuthFailureMode: 'http401WithBearerChallenge'
269 |       }
270 |     }
271 |     sku: {
272 |       name: searchServiceSkuName
273 |     }
274 |     semanticSearch: 'free'
275 |   }
276 | }
277 | 
278 | 
279 | // USER ROLES
280 | module openAiRoleUser 'core/security/role.bicep' = if (empty(openAiUrl)) {
281 |   scope: resourceGroup
282 |   name: 'openai-role-user'
283 |   params: {
284 |     principalId: principalId
285 |     // Cognitive Services OpenAI User
286 |     roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd'
287 |     principalType: 'User'
288 |   }
289 | }
290 | 
291 | module searchContribRoleUser 'core/security/role.bicep' = {
292 |   scope: resourceGroup
293 |   name: 'search-contrib-role-user'
294 |   params: {
295 |     principalId: principalId
296 |     // Search Index Data Contributor
297 |     roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7'
298 |     principalType: 'User'
299 |   }
300 | }
301 | 
302 | module searchSvcContribRoleUser 'core/security/role.bicep' = {
303 |   scope: resourceGroup
304 |   name: 'search-svccontrib-role-user'
305 |   params: {
306 |     principalId: principalId
307 |     // Search Service Contributor
308 |     roleDefinitionId: '7ca78c08-252a-4471-8644-bb5ff32d4ba0'
309 |     principalType: 'User'
310 |   }
311 | }
312 | 
313 | // SYSTEM IDENTITIES
314 | module openAiRoleBackendApi 'core/security/role.bicep' = if (empty(openAiUrl)) {
315 |   scope: resourceGroup
316 |   name: 'openai-role-backendapi'
317 |   params: {
318 |     principalId: backendApi.outputs.identityPrincipalId
319 |     // Cognitive Services OpenAI User
320 |     roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd'
321 |     principalType: 'ServicePrincipal'
322 |   }
323 | }
324 | 
325 | module searchRoleBackendApi 'core/security/role.bicep' = {
326 |   scope: resourceGroup
327 |   name: 'search-role-backendapi'
328 |   params: {
329 |     principalId: backendApi.outputs.identityPrincipalId
330 |     // Search Index Data Reader
331 |     roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f'
332 |     principalType: 'ServicePrincipal'
333 |   }
334 | }
335 | 
336 | module openAiRoleIndexerApi 'core/security/role.bicep' = if (empty(openAiUrl)) {
337 |   scope: resourceGroup
338 |   name: 'openai-role-indexer'
339 |   params: {
340 |     principalId: indexerApi.outputs.identityPrincipalId
341 |     // Cognitive Services OpenAI User
342 |     roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd'
343 |     principalType: 'ServicePrincipal'
344 |   }
345 | }
346 | 
347 | module searchContribRoleIndexerApi 'core/security/role.bicep' = {
348 |   scope: resourceGroup
349 |   name: 'search-contrib-role-indexer'
350 |   params: {
351 |     principalId: indexerApi.outputs.identityPrincipalId
352 |     // Search Index Data Contributor
353 |     roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7'
354 |     principalType: 'ServicePrincipal'
355 |   }
356 | }
357 | 
358 | module searchSvcContribRoleIndexerApi 'core/security/role.bicep' = {
359 |   scope: resourceGroup
360 |   name: 'search-svccontrib-role-indexer'
361 |   params: {
362 |     principalId: indexerApi.outputs.identityPrincipalId
363 |     // Search Service Contributor
364 |     roleDefinitionId: '7ca78c08-252a-4471-8644-bb5ff32d4ba0'
365 |     principalType: 'ServicePrincipal'
366 |   }
367 | }
368 | 
369 | output AZURE_LOCATION string = location
370 | output AZURE_TENANT_ID string = tenant().tenantId
371 | output AZURE_RESOURCE_GROUP string = resourceGroup.name
372 | 
373 | output AZURE_CONTAINER_REGISTRY_ENDPOINT string = containerApps.outputs.registryLoginServer
374 | output AZURE_CONTAINER_REGISTRY_NAME string = containerApps.outputs.registryName
375 | 
376 | output AZURE_OPENAI_URL string = finalOpenAiUrl
377 | output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = chatGptDeploymentName
378 | output AZURE_OPENAI_CHATGPT_MODEL string = chatGptModelName
379 | output AZURE_OPENAI_EMBEDDING_DEPLOYMENT string = embeddingDeploymentName
380 | output AZURE_OPENAI_EMBEDDING_MODEL string = embeddingModelName
381 | 
382 | output AZURE_SEARCH_INDEX string = searchIndexName
383 | output AZURE_SEARCH_SERVICE string = searchService.outputs.name
384 | 
385 | output FRONTEND_URI string = frontend.outputs.uri
386 | output BACKEND_API_URI string = backendApi.outputs.uri
387 | output INDEXER_API_URI string = indexerApi.outputs.uri
388 | 


--------------------------------------------------------------------------------
/infra/main.parameters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
 3 |   "contentVersion": "1.0.0.0",
 4 |   "parameters": {
 5 |     "environmentName": {
 6 |       "value": "${AZURE_ENV_NAME}"
 7 |     },
 8 |     "resourceGroupName": {
 9 |       "value": "${AZURE_RESOURCE_GROUP}"
10 |     },
11 |     "location": {
12 |       "value": "${AZURE_LOCATION}"
13 |     },
14 |     "principalId": {
15 |       "value": "${AZURE_PRINCIPAL_ID}"
16 |     },
17 |     "openAiLocation": {
18 |       "value": "${AZURE_OPENAI_LOCATION=eastus2}"
19 |     },
20 |     "openAiUrl": {
21 |       "value": "${AZURE_OPENAI_URL}"
22 |     },
23 |     "searchIndexName": {
24 |       "value": "${AZURE_SEARCH_INDEX=kbindex}"
25 |     },
26 |     "searchServiceSkuName": {
27 |       "value": "${AZURE_SEARCH_SERVICE_SKU=standard}"
28 |     },
29 |     "chatGptDeploymentName": {
30 |       "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT=chat}"
31 |     }
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "azure-openai-rag-workshop",
 3 |   "version": "1.0.0",
 4 |   "description": "Create your own ChatGPT with Retrieval-Augmented-Generation",
 5 |   "private": true,
 6 |   "directories": {
 7 |     "doc": "docs"
 8 |   },
 9 |   "scripts": {
10 |     "start": "concurrently \"npm:start:*\" --kill-others",
11 |     "start:frontend": "npm run dev --workspace=frontend",
12 |     "start:backend": "npm run dev --workspace=backend",
13 |     "build": "npm run build -ws --if-present",
14 |     "clean": "npm run clean -ws --if-present",
15 |     "docker:build": "npm run docker:build -ws --if-present",
16 |     "format": "prettier --list-different --write .",
17 |     "lint": "eslint .",
18 |     "lint:fix": "eslint --fix ."
19 |   },
20 |   "repository": {
21 |     "type": "git",
22 |     "url": "https://github.com/Azure-Samples/azure-openai-rag-workshop.git"
23 |   },
24 |   "homepage": "https://github.com/Azure-Samples/azure-openai-rag-workshop",
25 |   "bugs": {
26 |     "url": "https://github.com/Azure-Samples/azure-openai-rag-workshop/issues"
27 |   },
28 |   "keywords": [],
29 |   "author": "Microsoft",
30 |   "license": "MIT",
31 |   "workspaces": [
32 |     "src/*"
33 |   ],
34 |   "devDependencies": {
35 |     "@typescript-eslint/eslint-plugin": "^6.7.0",
36 |     "@typescript-eslint/parser": "^6.7.0",
37 |     "concurrently": "^8.2.1",
38 |     "eslint": "^8.49.0",
39 |     "eslint-plugin-import": "^2.28.1",
40 |     "eslint-plugin-n": "^16.1.0",
41 |     "eslint-plugin-unicorn": "^48.0.1",
42 |     "prettier": "^3.0.3",
43 |     "rimraf": "^5.0.5",
44 |     "typescript": "*"
45 |   },
46 |   "engines": {
47 |     "node": ">=18",
48 |     "npm": ">=9"
49 |   },
50 |   "prettier": {
51 |     "tabWidth": 2,
52 |     "semi": true,
53 |     "singleQuote": true,
54 |     "printWidth": 120,
55 |     "bracketSpacing": true
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/scripts/index-data.ps1:
--------------------------------------------------------------------------------
 1 | $scriptPath = $MyInvocation.MyCommand.Path
 2 | cd $scriptPath/../..
 3 | 
 4 | Write-Host "Loading azd .env file from current environment"
 5 | $output = azd env get-values
 6 | 
 7 | foreach ($line in $output) {
 8 |   if (!$line.Contains('=')) {
 9 |     continue
10 |   }
11 | 
12 |   $name, $value = $line.Split("=")
13 |   $value = $value -replace '^\"|\"$'
14 |   [Environment]::SetEnvironmentVariable($name, $value)
15 | }
16 | 
17 | Write-Host 'Installing dependencies and building CLI'
18 | npm ci
19 | npm run build --workspace=indexer
20 | 
21 | Write-Host 'Running "index-files" CLI tool'
22 | $files = Get-Item "data/*.pdf"
23 | npx index-files --wait --indexer-url "$env:INDEXER_API_URI" --index-name "$env:AZURE_SEARCH_INDEX" $files
24 | 


--------------------------------------------------------------------------------
/scripts/index-data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd "$(dirname "${BASH_SOURCE[0]}")/.."
 3 | 
 4 | echo "Loading azd .env file from current environment"
 5 | export $(azd env get-values | xargs)
 6 | 
 7 | echo 'Installing dependencies and building CLI'
 8 | npm ci
 9 | npm run build --workspace=indexer
10 | 
11 | echo 'Running "index-files" CLI tool'
12 | npx index-files \
13 |   --wait \
14 |   --indexer-url "${INDEXER_API_URI}" \
15 |   --index-name "${AZURE_SEARCH_INDEX}" \
16 |   ./data/*.pdf
17 | 


--------------------------------------------------------------------------------
/src/backend/README.md:
--------------------------------------------------------------------------------
 1 | # Chat backend
 2 | 
 3 | This project was bootstrapped with [Fastify-CLI](https://www.npmjs.com/package/fastify-cli).
 4 | 
 5 | ## Available Scripts
 6 | 
 7 | In the project directory, you can run:
 8 | 
 9 | ### `npm run dev`
10 | 
11 | To start the app in dev mode.\
12 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
13 | 
14 | ### `npm run build`
15 | 
16 | To build the app for production to the `dist` folder.
17 | 
18 | ### `npm start`
19 | 
20 | For production mode
21 | 
22 | ## Learn More
23 | 
24 | To learn Fastify, check out the [Fastify documentation](https://www.fastify.io/docs/latest/).
25 | 


--------------------------------------------------------------------------------
/src/backend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "backend",
 3 |   "version": "1.0.0",
 4 |   "description": "ChatGPT + RAG backend service",
 5 |   "private": true,
 6 |   "type": "module",
 7 |   "exports": "./src/app.ts",
 8 |   "scripts": {
 9 |     "start": "fastify start -l info dist/app.js",
10 |     "build": "tsc",
11 |     "watch": "tsc -w",
12 |     "dev": "npm run build && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"npm:watch\" \"npm:dev:start\"",
13 |     "dev:start": "fastify start --pretty-logs --ignore-watch=.ts$ -w -l debug dist/app.js",
14 |     "docker:build": "docker build --tag backend --file ./Dockerfile ../..",
15 |     "docker:run": "docker run --rm --publish 3000:3000 --env-file ../../.env backend",
16 |     "clean": "npx rimraf dist"
17 |   },
18 |   "dependencies": {
19 |     "@azure/identity": "4.0.0",
20 |     "@azure/search-documents": "12.0.0-beta.3",
21 |     "@dqbd/tiktoken": "^1.0.7",
22 |     "@fastify/autoload": "^5.0.0",
23 |     "@fastify/cors": "^8.3.0",
24 |     "@fastify/sensible": "^5.0.0",
25 |     "@fastify/type-provider-json-schema-to-ts": "^2.2.2",
26 |     "dotenv": "^16.3.1",
27 |     "fastify": "^4.0.0",
28 |     "fastify-cli": "^5.7.0",
29 |     "fastify-plugin": "^4.0.0",
30 |     "langchain": "^0.0.181"
31 |   },
32 |   "devDependencies": {
33 |     "@types/node": "^18.0.0",
34 |     "concurrently": "^8.2.0",
35 |     "fastify-tsconfig": "^2.0.0",
36 |     "ts-node": "^10.9.1",
37 |     "typescript": "^5.1.6"
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/backend/src/app.ts:
--------------------------------------------------------------------------------
 1 | import path, { join } from 'node:path';
 2 | import { fileURLToPath } from 'node:url';
 3 | import { type FastifyPluginAsync } from 'fastify';
 4 | import AutoLoad, { type AutoloadPluginOptions } from '@fastify/autoload';
 5 | import cors from '@fastify/cors';
 6 | 
 7 | export type AppOptions = {
 8 |   // Place your custom options for app below here.
 9 | } & Partial<AutoloadPluginOptions>;
10 | 
11 | // Pass --options via CLI arguments in command to enable these options.
12 | const options: AppOptions = {};
13 | 
14 | const __filename = fileURLToPath(import.meta.url);
15 | const __dirname = path.dirname(__filename);
16 | 
17 | const app: FastifyPluginAsync<AppOptions> = async (fastify, options_): Promise<void> => {
18 |   // Place here your custom code!
19 | 
20 |   fastify.register(cors);
21 | 
22 |   // Do not touch the following lines
23 | 
24 |   // This loads all plugins defined in plugins
25 |   // those should be support plugins that are reused
26 |   // through your application
27 |   fastify.register(AutoLoad, {
28 |     dir: join(__dirname, 'plugins'),
29 |     options: options_,
30 |   });
31 | 
32 |   // This loads all plugins defined in routes
33 |   // define your routes in one of these
34 |   fastify.register(AutoLoad, {
35 |     dir: join(__dirname, 'routes'),
36 |     options: options_,
37 |   });
38 | };
39 | 
40 | export default app;
41 | export { app, options };
42 | 


--------------------------------------------------------------------------------
/src/backend/src/lib/index.ts:
--------------------------------------------------------------------------------
1 | export * from './message-builder.js';
2 | export * from './models.js';
3 | 


--------------------------------------------------------------------------------
/src/backend/src/lib/message-builder.ts:
--------------------------------------------------------------------------------
 1 | import { encoding_for_model, type TiktokenModel } from '@dqbd/tiktoken';
 2 | import { type BaseMessage, AIMessage, HumanMessage, SystemMessage } from 'langchain/schema';
 3 | import { type Message, type MessageRole } from './models.js';
 4 | 
 5 | export class MessageBuilder {
 6 |   messages: Message[];
 7 |   model: string;
 8 |   tokens: number;
 9 | 
10 |   /**
11 |    * A class for building and managing messages in a chat conversation.
12 |    * @param {string} systemContent The initial system message content.
13 |    * @param {string} chatgptModel The name of the ChatGPT model.
14 |    */
15 |   constructor(systemContent: string, chatgptModel: string) {
16 |     this.model = chatgptModel;
17 |     this.messages = [{ role: 'system', content: systemContent }];
18 |     this.tokens = this.getTokenCountFromMessages(this.messages[this.messages.length - 1], this.model);
19 |   }
20 | 
21 |   /**
22 |    * Append a new message to the conversation.
23 |    * @param {MessageRole} role The role of the message sender.
24 |    * @param {string} content The content of the message.
25 |    * @param {number} index The index at which to insert the message.
26 |    */
27 |   appendMessage(role: MessageRole, content: string, index = 1) {
28 |     this.messages.splice(index, 0, { role, content });
29 |     this.tokens += this.getTokenCountFromMessages(this.messages[index], this.model);
30 |   }
31 | 
32 |   /**
33 |    * Get the messages in the conversation in LangChain format.
34 |    * @returns {BaseMessage[]} The messages.
35 |    */
36 |   getMessages(): BaseMessage[] {
37 |     return this.messages.map((message) => {
38 |       if (message.role === 'system') {
39 |         return new SystemMessage(message.content);
40 |       } else if (message.role === 'assistant') {
41 |         return new AIMessage(message.content);
42 |       } else {
43 |         return new HumanMessage(message.content);
44 |       }
45 |     });
46 |   }
47 | 
48 |   /**
49 |    * Calculate the number of tokens required to encode a message.
50 |    * @param {Message} message The message to encode.
51 |    * @param {string} model The name of the model to use for encoding.
52 |    * @returns {number} The total number of tokens required to encode the message.
53 |    * @example
54 |    * const message = { role: 'user', content: 'Hello, how are you?' };
55 |    * const model = 'gpt-3.5-turbo';
56 |    * getTokenCountFromMessages(message, model);
57 |    * // output: 11
58 |    */
59 |   private getTokenCountFromMessages(message: Message, model: string): number {
60 |     // GPT3.5 tiktoken model name is slightly different than Azure OpenAI model name
61 |     const tiktokenModel = model.replace('gpt-35', 'gpt-3.5') as TiktokenModel;
62 |     const encoder = encoding_for_model(tiktokenModel);
63 |     let tokens = 2; // For "role" and "content" keys
64 |     for (const value of Object.values(message)) {
65 |       tokens += encoder.encode(value).length;
66 |     }
67 |     encoder.free();
68 |     return tokens;
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/backend/src/lib/models.ts:
--------------------------------------------------------------------------------
 1 | export type MessageRole = 'system' | 'user' | 'assistant';
 2 | 
 3 | export interface Message {
 4 |   role: MessageRole;
 5 |   content: string;
 6 | }
 7 | 
 8 | export type ChatResponseMessage = Message & {
 9 |   context?: Record<string, any> & {
10 |     data_points?: string[];
11 |     thoughts?: string;
12 |   };
13 |   session_state?: Record<string, any>;
14 | };
15 | 
16 | export interface ChatResponse {
17 |   choices: Array<{
18 |     index: number;
19 |     message: ChatResponseMessage;
20 |   }>;
21 |   object: 'chat.completion';
22 | }
23 | 
24 | export interface ChatResponseChunk {
25 |   choices: Array<{
26 |     index: number;
27 |     delta: Partial<ChatResponseMessage>;
28 |     finish_reason: string | null;
29 |   }>;
30 |   object: 'chat.completion.chunk';
31 | }
32 | 


--------------------------------------------------------------------------------
/src/backend/src/plugins/README.md:
--------------------------------------------------------------------------------
 1 | # Plugins Folder
 2 | 
 3 | Plugins define behavior that is common to all the routes in your
 4 | application. Authentication, caching, templates, and all the other cross
 5 | cutting concerns should be handled by plugins placed in this folder.
 6 | 
 7 | Files in this folder are typically defined through the
 8 | [`fastify-plugin`](https://github.com/fastify/fastify-plugin) module,
 9 | making them non-encapsulated. They can define decorators and set hooks
10 | that will then be used in the rest of your application.
11 | 
12 | Check out:
13 | 
14 | - [The hitchhiker's guide to plugins](https://www.fastify.io/docs/latest/Guides/Plugins-Guide/)
15 | - [Fastify decorators](https://www.fastify.io/docs/latest/Reference/Decorators/).
16 | - [Fastify lifecycle](https://www.fastify.io/docs/latest/Reference/Lifecycle/).
17 | 


--------------------------------------------------------------------------------
/src/backend/src/plugins/chat.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import { ChatOpenAI, type OpenAIChatInput } from 'langchain/chat_models/openai';
 3 | import { OpenAIEmbeddings, type OpenAIEmbeddingsParams } from 'langchain/embeddings/openai';
 4 | import { type Message, MessageBuilder, type ChatResponse, type ChatResponseChunk } from '../lib/index.js';
 5 | 
 6 | export class ChatService {
 7 |   tokenLimit: number = 4000;
 8 | 
 9 |   constructor(
10 |     private searchClient: SearchClient<any>,
11 |     private chatClient: (options?: Partial<OpenAIChatInput>) => ChatOpenAI,
12 |     private embeddingsClient: (options?: Partial<OpenAIEmbeddingsParams>) => OpenAIEmbeddings,
13 |     private chatGptModel: string,
14 |     private embeddingModel: string,
15 |     private sourcePageField: string,
16 |     private contentField: string,
17 |   ) {}
18 | 
19 |   async run(messages: Message[]): Promise<ChatResponse> {
20 | 
21 |     // TODO: implement Retrieval Augmented Generation (RAG) here
22 | 
23 |   }
24 | }
25 | 
26 | export default fp(
27 |   async (fastify, options) => {
28 |     const config = fastify.config;
29 | 
30 |     // TODO: initialize clients here
31 | 
32 |     const chatService = new ChatService(
33 |       /*
34 |       searchClient,
35 |       chatClient,
36 |       embeddingsClient,
37 |       config.azureOpenAiChatGptModel,
38 |       config.azureOpenAiEmbeddingModel,
39 |       config.kbFieldsSourcePage,
40 |       config.kbFieldsContent,
41 |       */
42 |     );
43 | 
44 |     fastify.decorate('chat', chatService);
45 |   },
46 |   {
47 |     name: 'chat',
48 |     dependencies: ['config'],
49 |   },
50 | );
51 | 
52 | // When using .decorate you have to specify added properties for Typescript
53 | declare module 'fastify' {
54 |   export interface FastifyInstance {
55 |     chat: ChatService;
56 |   }
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/src/backend/src/plugins/config.ts:
--------------------------------------------------------------------------------
 1 | import process from 'node:process';
 2 | import path from 'node:path';
 3 | import * as dotenv from 'dotenv';
 4 | import fp from 'fastify-plugin';
 5 | 
 6 | export interface AppConfig {
 7 |   azureSearchService: string;
 8 |   azureSearchIndex: string;
 9 |   azureOpenAiUrl: string;
10 |   azureOpenAiChatGptDeployment: string;
11 |   azureOpenAiChatGptModel: string;
12 |   azureOpenAiEmbeddingDeployment: string;
13 |   azureOpenAiEmbeddingModel: string;
14 |   kbFieldsContent: string;
15 |   kbFieldsSourcePage: string;
16 | }
17 | 
18 | const camelCaseToUpperSnakeCase = (s: string) => s.replaceAll(/[A-Z]/g, (l) => `_${l}`).toUpperCase();
19 | 
20 | export default fp(
21 |   async (fastify, options) => {
22 |     const environmentPath = path.resolve(process.cwd(), '../../.env');
23 | 
24 |     console.log(`Loading .env config from ${environmentPath}...`);
25 |     dotenv.config({ path: environmentPath });
26 | 
27 |     const config: AppConfig = {
28 |       azureSearchService: process.env.AZURE_SEARCH_SERVICE || '',
29 |       azureSearchIndex: process.env.AZURE_SEARCH_INDEX || 'kbindex',
30 |       azureOpenAiUrl: process.env.AZURE_OPENAI_URL || '',
31 |       azureOpenAiChatGptDeployment: process.env.AZURE_OPENAI_CHATGPT_DEPLOYMENT || 'chat',
32 |       azureOpenAiChatGptModel: process.env.AZURE_OPENAI_CHATGPT_MODEL || 'gpt-35-turbo',
33 |       azureOpenAiEmbeddingDeployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT || 'embedding',
34 |       azureOpenAiEmbeddingModel: process.env.AZURE_OPENAI_EMBEDDING_MODEL || 'text-embedding-ada-002',
35 |       kbFieldsContent: process.env.KB_FIELDS_CONTENT || 'content',
36 |       kbFieldsSourcePage: process.env.KB_FIELDS_SOURCEPAGE || 'sourcepage',
37 |     };
38 | 
39 |     // Check that all config values are set
40 |     for (const [key, value] of Object.entries(config)) {
41 |       if (!value) {
42 |         const variableName = camelCaseToUpperSnakeCase(key).replace('OPEN_AI', 'OPENAI');
43 |         const message = `${variableName} environment variable must be set`;
44 |         fastify.log.error(message);
45 |         throw new Error(message);
46 |       }
47 |     }
48 | 
49 |     fastify.decorate('config', config);
50 |   },
51 |   {
52 |     name: 'config',
53 |   },
54 | );
55 | 
56 | // When using .decorate you have to specify added properties for Typescript
57 | declare module 'fastify' {
58 |   export interface FastifyInstance {
59 |     config: AppConfig;
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/backend/src/plugins/sensible.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import sensible, { type SensibleOptions } from '@fastify/sensible';
 3 | 
 4 | /**
 5 |  * This plugins adds some utilities to handle http errors
 6 |  * @see https://github.com/fastify/fastify-sensible
 7 |  */
 8 | export default fp<SensibleOptions>(async (fastify) => {
 9 |   fastify.register(sensible);
10 | });
11 | 


--------------------------------------------------------------------------------
/src/backend/src/routes/README.md:
--------------------------------------------------------------------------------
 1 | # Routes Folder
 2 | 
 3 | Routes define endpoints within your application. Fastify provides an
 4 | easy path to a microservice architecture, in the future you might want
 5 | to independently deploy some of those.
 6 | 
 7 | In this folder you should define all the routes that define the endpoints
 8 | of your web application.
 9 | Each service is a [Fastify
10 | plugin](https://www.fastify.io/docs/latest/Reference/Plugins/), it is
11 | encapsulated (it can have its own independent plugins) and it is
12 | typically stored in a file; be careful to group your routes logically,
13 | e.g. all `/users` routes in a `users.js` file. We have added
14 | a `root.js` file for you with a '/' root added.
15 | 
16 | If a single file become too large, create a folder and add a `index.js` file there:
17 | this file must be a Fastify plugin, and it will be loaded automatically
18 | by the application. You can now add as many files as you want inside that folder.
19 | In this way you can create complex routes within a single monolith,
20 | and eventually extract them.
21 | 
22 | If you need to share functionality between routes, place that
23 | functionality into the `plugins` folder, and share it via
24 | [decorators](https://www.fastify.io/docs/latest/Reference/Decorators/).
25 | 


--------------------------------------------------------------------------------
/src/backend/src/routes/root.ts:
--------------------------------------------------------------------------------
 1 | import { type FastifyReply, type FastifyPluginAsync } from 'fastify';
 2 | 
 3 | const root: FastifyPluginAsync = async (fastify, options): Promise<void> => {
 4 |   fastify.get('/', async function (request, reply) {
 5 |     return { message: 'server up' };
 6 |   });
 7 | 
 8 |   // TODO: create /chat endpoint
 9 | };
10 | 
11 | export default root;
12 | 
13 | 


--------------------------------------------------------------------------------
/src/backend/test.http:
--------------------------------------------------------------------------------
 1 | ##################################################################
 2 | # VS Code with REST Client extension is needed to use this file.
 3 | # Download at: https://aka.ms/vscode/rest-client
 4 | ##################################################################
 5 | 
 6 | @api_host = http://localhost:3000
 7 | 
 8 | # Chat with the bot
 9 | POST {{api_host}}/chat
10 | Content-Type: application/json
11 | 
12 | {
13 |   "messages": [{
14 |     "content": "How to search and book rentals?",
15 |     "role": "user"
16 |   }]
17 | }
18 | 
19 | ###
20 | 
21 | # Chat with the bot using streaming
22 | POST {{api_host}}/chat
23 | Content-Type: application/json
24 | 
25 | {
26 |   "messages": [{
27 |     "content": "How to search and book rentals?",
28 |     "role": "user"
29 |   }],
30 |   "stream": true
31 | }
32 | 


--------------------------------------------------------------------------------
/src/backend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "fastify-tsconfig",
 3 |   "compilerOptions": {
 4 |     "outDir": "dist",
 5 |     "module": "esnext",
 6 |     "moduleResolution": "node",
 7 |     "sourceMap": true,
 8 |     "esModuleInterop": true,
 9 |     "noUnusedLocals": false,
10 |     "lib": ["esnext"]
11 |   },
12 |   "include": ["src/**/*.ts"]
13 | }
14 | 


--------------------------------------------------------------------------------
/src/frontend/.lintstagedrc:
--------------------------------------------------------------------------------
1 | {
2 |   "*.{js,jsx,ts,tsx}": ["eslint --fix", "lit-analyzer"],
3 |   "*": ["prettier --ignore-unknown --write"]
4 | }
5 | 


--------------------------------------------------------------------------------
/src/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # Chat frontend
 2 | 
 3 | This project uses [Vite](https://vitejs.dev/) as a frontend build tool, and [Lit](https://lit.dev/) as a web components library.
 4 | 
 5 | ## Available Scripts
 6 | 
 7 | In the project directory, you can run:
 8 | 
 9 | ### `npm run dev`
10 | 
11 | To start the app in dev mode.\
12 | Open [http://localhost:8000](http://localhost:8000) to view it in the browser.
13 | 
14 | ### `npm run build`
15 | 
16 | To build the app for production to the `dist` folder.
17 | 


--------------------------------------------------------------------------------
/src/frontend/assets/lightbulb.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M11.25 6.75073C11.25 6.33652 11.5858 6.00073 12 6.00073C12.4142 6.00073 12.75 6.33652 12.75 6.75073V8.25076C12.75 8.66498 12.4142 9.00076 12 9.00076C11.5858 9.00076 11.25 8.66498 11.25 8.25076V6.75073ZM16.2803 8.21607C15.9874 7.92318 15.5126 7.92318 15.2197 8.21607L14.159 9.27675C13.8661 9.56964 13.8661 10.0445 14.159 10.3374C14.4519 10.6303 14.9268 10.6303 15.2197 10.3374L16.2803 9.27673C16.5732 8.98384 16.5732 8.50896 16.2803 8.21607ZM8.78032 8.21607C8.48743 7.92318 8.01255 7.92318 7.71966 8.21607C7.42677 8.50896 7.42677 8.98384 7.71966 9.27673L8.78034 10.3374C9.07324 10.6303 9.54811 10.6303 9.841 10.3374C10.1339 10.0445 10.1339 9.56964 9.841 9.27675L8.78032 8.21607ZM12 2.00098C16.0041 2.00098 19.25 5.24691 19.25 9.25098C19.25 11.347 18.3493 13.2707 16.5869 14.9929C16.51 15.0681 16.4504 15.1586 16.4118 15.2582L16.3804 15.3605L15.2493 20.2561C15.0266 21.22 14.2035 21.9183 13.2302 21.993L13.057 21.9996H10.9433C9.95374 21.9996 9.08791 21.3545 8.79629 20.4228L8.75088 20.2555L7.62132 15.3607C7.58904 15.2208 7.51728 15.0931 7.41456 14.9928C5.73515 13.3526 4.83778 11.5297 4.75613 9.54923L4.75 9.25098L4.75388 9.01166C4.88014 5.11837 8.07601 2.00098 12 2.00098ZM14.115 18.499H9.884L10.2125 19.9182C10.2831 20.2245 10.5357 20.4504 10.8401 20.4925L10.9433 20.4996H13.057C13.3713 20.4996 13.6481 20.3044 13.7577 20.0174L13.7878 19.9184L14.115 18.499ZM12 3.50098C8.89821 3.50098 6.37006 5.95699 6.25415 9.03042L6.25 9.25098L6.25672 9.52799C6.33286 11.0913 7.05722 12.5471 8.46262 13.9197C8.72675 14.1777 8.92265 14.496 9.03422 14.846L9.08291 15.0235L9.538 16.999H11.25V10.7503C11.25 10.3361 11.5858 10.0003 12 10.0003C12.4142 10.0003 12.75 10.3361 12.75 10.7503V16.999H14.461L14.9189 15.0228C15.0019 14.6634 15.1718 14.3309 15.4124 14.0539L15.5386 13.9201C16.9432 12.5475 17.6672 11.0916 17.7433 9.52803L17.75 9.25098L17.7458 9.03042C17.6299 5.95699 15.1018 3.50098 12 3.50098Z"/></svg>


--------------------------------------------------------------------------------
/src/frontend/assets/new-chat.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 2C17.5228 2 22 6.47715 22 12C22 12.2628 21.9899 12.5232 21.97 12.7809C21.5319 12.3658 21.0361 12.0111 20.4958 11.73C20.3532 7.16054 16.6041 3.5 12 3.5C7.30558 3.5 3.5 7.30558 3.5 12C3.5 13.4696 3.87277 14.8834 4.57303 16.1375L4.72368 16.4072L3.61096 20.3914L7.59755 19.2792L7.86709 19.4295C9.04305 20.0852 10.3592 20.4531 11.73 20.4958C12.0111 21.0361 12.3658 21.5319 12.7809 21.97C12.5232 21.9899 12.2628 22 12 22C10.3817 22 8.81782 21.6146 7.41286 20.888L3.58704 21.9553C2.92212 22.141 2.23258 21.7525 2.04691 21.0876C1.98546 20.8676 1.98549 20.6349 2.04695 20.4151L3.11461 16.5922C2.38637 15.186 2 13.6203 2 12C2 6.47715 6.47715 2 12 2ZM23 17.5C23 14.4624 20.5376 12 17.5 12C14.4624 12 12 14.4624 12 17.5C12 20.5376 14.4624 23 17.5 23C20.5376 23 23 20.5376 23 17.5ZM18.0006 18L18.0011 20.5035C18.0011 20.7797 17.7773 21.0035 17.5011 21.0035C17.225 21.0035 17.0011 20.7797 17.0011 20.5035L17.0006 18H14.4956C14.2197 18 13.9961 17.7762 13.9961 17.5C13.9961 17.2239 14.2197 17 14.4956 17H17.0005L17 14.4993C17 14.2231 17.2239 13.9993 17.5 13.9993C17.7761 13.9993 18 14.2231 18 14.4993L18.0005 17H20.4966C20.7725 17 20.9961 17.2239 20.9961 17.5C20.9961 17.7762 20.7725 18 20.4966 18H18.0006Z"></path></svg>


--------------------------------------------------------------------------------
/src/frontend/assets/question.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22C10.3817 22 8.81782 21.6146 7.41286 20.888L3.58704 21.9553C2.92212 22.141 2.23258 21.7525 2.04691 21.0876C1.98546 20.8676 1.98549 20.6349 2.04695 20.4151L3.11461 16.5922C2.38637 15.186 2 13.6203 2 12C2 6.47715 6.47715 2 12 2ZM12 3.5C7.30558 3.5 3.5 7.30558 3.5 12C3.5 13.4696 3.87277 14.8834 4.57303 16.1375L4.72368 16.4072L3.61096 20.3914L7.59755 19.2792L7.86709 19.4295C9.12006 20.1281 10.5322 20.5 12 20.5C16.6944 20.5 20.5 16.6944 20.5 12C20.5 7.30558 16.6944 3.5 12 3.5ZM12 15.5C12.5523 15.5 13 15.9477 13 16.5C13 17.0523 12.5523 17.5 12 17.5C11.4477 17.5 11 17.0523 11 16.5C11 15.9477 11.4477 15.5 12 15.5ZM12 6.75C13.5188 6.75 14.75 7.98122 14.75 9.5C14.75 10.5108 14.4525 11.074 13.6989 11.8586L13.5303 12.0303C12.9084 12.6522 12.75 12.9163 12.75 13.5C12.75 13.9142 12.4142 14.25 12 14.25C11.5858 14.25 11.25 13.9142 11.25 13.5C11.25 12.4892 11.5475 11.926 12.3011 11.1414L12.4697 10.9697C13.0916 10.3478 13.25 10.0837 13.25 9.5C13.25 8.80964 12.6904 8.25 12 8.25C11.3528 8.25 10.8205 8.74187 10.7565 9.37219L10.75 9.5C10.75 9.91421 10.4142 10.25 10 10.25C9.58579 10.25 9.25 9.91421 9.25 9.5C9.25 7.98122 10.4812 6.75 12 6.75Z"></path></svg>


--------------------------------------------------------------------------------
/src/frontend/assets/send.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M5.69362 11.9997L2.29933 3.2715C2.0631 2.66403 2.65544 2.08309 3.2414 2.28959L3.33375 2.32885L21.3337 11.3288C21.852 11.588 21.8844 12.2975 21.4309 12.6129L21.3337 12.6705L3.33375 21.6705C2.75077 21.962 2.11746 21.426 2.2688 20.8234L2.29933 20.7278L5.69362 11.9997L2.29933 3.2715L5.69362 11.9997ZM4.4021 4.54007L7.01109 11.2491L13.6387 11.2497C14.0184 11.2497 14.3322 11.5318 14.3818 11.8979L14.3887 11.9997C14.3887 12.3794 14.1065 12.6932 13.7404 12.7428L13.6387 12.7497L7.01109 12.7491L4.4021 19.4593L19.3213 11.9997L4.4021 4.54007Z"></path></svg>


--------------------------------------------------------------------------------
/src/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width,initial-scale=1" />
 6 |     <meta name="description" content="ChatGPT with Enterprise Data" />
 7 |     <link rel="icon" type="image/x-icon" href="/favicon.ico" />
 8 |     <title>ChatGPT with Enterprise Data</title>
 9 |     <style>
10 |       :root {
11 |         --azc-primary: #06b;
12 |         --azc-bg: #eee;
13 |         --azc-border-radius: 16px;
14 |       }
15 |       html,
16 |       body {
17 |         font-size: 16px;
18 |         margin: 0;
19 |         background: var(--azc-bg);
20 |       }
21 |       nav {
22 |         background: #333;
23 |         color: #fff;
24 |         padding: 16px;
25 |         font-family:
26 |           'Segoe UI',
27 |           -apple-system,
28 |           BlinkMacSystemFont,
29 |           Roboto,
30 |           'Helvetica Neue',
31 |           sans-serif;
32 |         font-size: 1.25rem;
33 |       }
34 |       azc-chat {
35 |         display: block;
36 |         max-width: 1024px;
37 |         margin: 0 auto;
38 |       }
39 |     </style>
40 |   </head>
41 |   <body>
42 |     <nav>ChatGPT with Enterprise Data</nav>
43 |     <azc-chat></azc-chat>
44 |     <script type="module" src="/src/index.ts"></script>
45 |   </body>
46 | </html>
47 | 


--------------------------------------------------------------------------------
/src/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "1.0.0",
 4 |   "description": "Frontend for the ChatGPT RAG workshop",
 5 |   "private": true,
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "dev": "vite --port 8000 --host",
 9 |     "build": "vite build",
10 |     "watch": "vite build --watch --minify false",
11 |     "lint": "lit-analyzer",
12 |     "clean": "npx rimraf dist"
13 |   },
14 |   "author": "Microsoft",
15 |   "license": "MIT",
16 |   "dependencies": {
17 |     "lit": "^3.0.0"
18 |   },
19 |   "devDependencies": {
20 |     "lit-analyzer": "^2.0.1",
21 |     "typescript": "^5.2.2",
22 |     "vite": "^4.5.0"
23 |   },
24 |   "files": [
25 |     "dist"
26 |   ]
27 | }
28 | 


--------------------------------------------------------------------------------
/src/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-openai-rag-workshop-template/5fb3bee73fbc067489f98d0a867bea504ad99cb3/src/frontend/public/favicon.ico


--------------------------------------------------------------------------------
/src/frontend/src/api.ts:
--------------------------------------------------------------------------------
 1 | import { type ChatResponse, type ChatRequestOptions, type ChatResponseChunk } from './models.js';
 2 | 
 3 | export const apiBaseUrl = import.meta.env.VITE_BACKEND_API_URI || '';
 4 | 
 5 | export async function getCompletion(options: ChatRequestOptions, oneShot = false) {
 6 |   const apiUrl = options.apiUrl || apiBaseUrl;
 7 | 
 8 |   // TODO: complete call to Chat API here
 9 |   // const response =
10 | 
11 |   if (options.stream) {
12 |     return getChunksFromResponse<ChatResponseChunk>(response as Response, options.chunkIntervalMs);
13 |   }
14 | 
15 |   const json: ChatResponse = await response.json();
16 |   if (response.status > 299 || !response.ok) {
17 |     throw new Error(json.error || 'Unknown error');
18 |   }
19 | 
20 |   return json;
21 | }
22 | 
23 | export function getCitationUrl(citation: string): string {
24 |   return `${apiBaseUrl}/content/${citation}`;
25 | }
26 | 
27 | export class NdJsonParserStream extends TransformStream<string, JSON> {
28 |   private buffer: string = '';
29 |   constructor() {
30 |     let controller: TransformStreamDefaultController<JSON>;
31 |     super({
32 |       start: (_controller) => {
33 |         controller = _controller;
34 |       },
35 |       transform: (chunk) => {
36 |         const jsonChunks = chunk.split('\n').filter(Boolean);
37 |         for (const jsonChunk of jsonChunks) {
38 |           try {
39 |             this.buffer += jsonChunk;
40 |             controller.enqueue(JSON.parse(this.buffer));
41 |             this.buffer = '';
42 |           } catch {
43 |             // Invalid JSON, wait for next chunk
44 |           }
45 |         }
46 |       },
47 |     });
48 |   }
49 | }
50 | 
51 | export async function* getChunksFromResponse<T>(response: Response, intervalMs: number): AsyncGenerator<T, void> {
52 |   const reader = response.body?.pipeThrough(new TextDecoderStream()).pipeThrough(new NdJsonParserStream()).getReader();
53 |   if (!reader) {
54 |     throw new Error('No response body or body is not readable');
55 |   }
56 | 
57 |   let value: JSON | undefined;
58 |   let done: boolean;
59 |   while ((({ value, done } = await reader.read()), !done)) {
60 |     yield new Promise<T>((resolve) => {
61 |       setTimeout(() => {
62 |         resolve(value as T);
63 |       }, intervalMs);
64 |     });
65 |   }
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/src/frontend/src/components/chat.ts:
--------------------------------------------------------------------------------
  1 | /* eslint-disable unicorn/template-indent */
  2 | import { LitElement, css, html, nothing } from 'lit';
  3 | import { map } from 'lit/directives/map.js';
  4 | import { repeat } from 'lit/directives/repeat.js';
  5 | import { unsafeSVG } from 'lit/directives/unsafe-svg.js';
  6 | import { customElement, property, state, query } from 'lit/decorators.js';
  7 | import {
  8 |   type ChatRequestOptions,
  9 |   type ChatResponse,
 10 |   type ChatMessage,
 11 |   type ChatResponseChunk,
 12 |   type ChatDebugDetails,
 13 |   type ChatMessageContext,
 14 | } from '../models.js';
 15 | import { getCitationUrl, getCompletion } from '../api.js';
 16 | import { type ParsedMessage, parseMessageIntoHtml } from '../message-parser.js';
 17 | import sendSvg from '../../assets/send.svg?raw';
 18 | import questionSvg from '../../assets/question.svg?raw';
 19 | import lightbulbSvg from '../../assets/lightbulb.svg?raw';
 20 | import newChatSvg from '../../assets/new-chat.svg?raw';
 21 | import './debug.js';
 22 | 
 23 | export type ChatComponentState = {
 24 |   hasError: boolean;
 25 |   isLoading: boolean;
 26 |   isStreaming: boolean;
 27 | };
 28 | 
 29 | export type ChatComponentOptions = ChatRequestOptions & {
 30 |   oneShot: boolean;
 31 |   enablePromptSuggestions: boolean;
 32 |   enableContentLinks: boolean;
 33 |   promptSuggestions: string[];
 34 |   apiUrl?: string;
 35 |   strings: {
 36 |     promptSuggestionsTitle: string;
 37 |     citationsTitle: string;
 38 |     followUpQuestionsTitle: string;
 39 |     showThoughtProcessTitle: string;
 40 |     closeTitle: string;
 41 |     thoughtsTitle: string;
 42 |     supportingContentTitle: string;
 43 |     chatInputPlaceholder: string;
 44 |     chatInputButtonLabel: string;
 45 |     assistant: string;
 46 |     user: string;
 47 |     errorMessage: string;
 48 |     newChatButton: string;
 49 |     retryButton: string;
 50 |   };
 51 | };
 52 | 
 53 | export const defaultOptions: ChatComponentOptions = {
 54 |   approach: 'rrr' as const,
 55 |   suggestFollowupQuestions: true,
 56 |   enableContentLinks: false,
 57 |   oneShot: false,
 58 |   stream: false,
 59 |   chunkIntervalMs: 30,
 60 |   apiUrl: '',
 61 |   enablePromptSuggestions: true,
 62 |   promptSuggestions: [
 63 |     'How to search and book rentals?',
 64 |     'What is the refund policy?',
 65 |     'How to contact a representative?',
 66 |   ],
 67 |   messages: [],
 68 |   strings: {
 69 |     promptSuggestionsTitle: 'Ask anything or try an example',
 70 |     citationsTitle: 'Citations:',
 71 |     followUpQuestionsTitle: 'Follow-up questions:',
 72 |     showThoughtProcessTitle: 'Show thought process',
 73 |     closeTitle: 'Close',
 74 |     thoughtsTitle: 'Thought process',
 75 |     supportingContentTitle: 'Supporting Content',
 76 |     chatInputPlaceholder: 'Ask me anything...',
 77 |     chatInputButtonLabel: 'Send question',
 78 |     assistant: 'Support Assistant',
 79 |     user: 'You',
 80 |     errorMessage: 'We are currently experiencing an issue.',
 81 |     newChatButton: 'New chat',
 82 |     retryButton: 'Retry',
 83 |   },
 84 | };
 85 | 
 86 | /**
 87 |  * A chat component that allows the user to ask questions and get answers from an API.
 88 |  * The component also displays default prompts that the user can click on to ask a question.
 89 |  * The component is built as a custom element that extends LitElement.
 90 |  *
 91 |  * Labels and other aspects are configurable via the `option` property.
 92 |  * @element azc-chat
 93 |  * @fires messagesUpdated - Fired when the message thread is updated
 94 |  * @fires stateChanged - Fired when the state of the component changes
 95 |  * */
 96 | @customElement('azc-chat')
 97 | export class ChatComponent extends LitElement {
 98 |   @property({
 99 |     type: Object,
100 |     converter: (value) => ({ ...defaultOptions, ...JSON.parse(value || '{}') }),
101 |   })
102 |   options: ChatComponentOptions = defaultOptions;
103 | 
104 |   @property() question = '';
105 |   @property({ type: Array }) messages: ChatMessage[] = [];
106 |   @state() protected hasError = false;
107 |   @state() protected isLoading = false;
108 |   @state() protected isStreaming = false;
109 |   @state() protected debugDetails?: ChatDebugDetails;
110 |   @query('.messages') protected messagesElement;
111 |   @query('.chat-input') protected chatInputElement;
112 | 
113 |   onSuggestionClicked(suggestion: string) {
114 |     this.question = suggestion;
115 |     this.onSendClicked();
116 |   }
117 | 
118 |   onCitationClicked(citation: string) {
119 |     if (this.options.enableContentLinks) {
120 |       const path = getCitationUrl(citation);
121 |       window.open(path, '_blank');
122 |     } else {
123 |       // TODO: open debug details
124 |     }
125 |   }
126 | 
127 |   onKeyPressed(event: KeyboardEvent) {
128 |     if (event.key === 'Enter') {
129 |       event.preventDefault();
130 |       this.onSendClicked();
131 |     }
132 |   }
133 | 
134 |   onShowDebugClicked(context: ChatMessageContext = {}) {
135 |     this.debugDetails = {
136 |       thoughts: context.thoughts ?? '',
137 |       dataPoints: context.data_points ?? [],
138 |     };
139 |   }
140 | 
141 |   async onSendClicked(isRetry = false) {
142 |     if (this.isLoading) return;
143 | 
144 |     this.hasError = false;
145 |     if (!isRetry) {
146 |       this.messages = [
147 |         ...this.messages,
148 |         {
149 |           content: this.question,
150 |           role: 'user',
151 |         },
152 |       ];
153 |     }
154 |     this.question = '';
155 |     this.isLoading = true;
156 |     this.scrollToLastMessage();
157 |     try {
158 |       const response = await getCompletion({ ...this.options, messages: this.messages }, this.options.oneShot);
159 |       if (this.options.stream && !this.options.oneShot) {
160 |         this.isStreaming = true;
161 |         const chunks = response as AsyncGenerator<ChatResponseChunk>;
162 |         const messages = this.messages;
163 |         const message: ChatMessage = {
164 |           content: '',
165 |           role: 'assistant',
166 |           context: {
167 |             data_points: [],
168 |             thoughts: '',
169 |           },
170 |         };
171 |         for await (const chunk of chunks) {
172 |           if (chunk.choices[0].delta.context?.data_points) {
173 |             message.context!.data_points = chunk.choices[0].delta.context?.data_points;
174 |             message.context!.thoughts = chunk.choices[0].delta.context?.thoughts ?? '';
175 |           } else if (chunk.choices[0].delta.content) {
176 |             message.content += chunk.choices[0].delta.content;
177 |             this.messages = [...messages, message];
178 |             this.scrollToLastMessage();
179 |           }
180 |         }
181 |       } else {
182 |         const chatResponse = response as ChatResponse;
183 |         this.messages = [...this.messages, chatResponse.choices[0].message];
184 |         this.scrollToLastMessage();
185 |       }
186 | 
187 |       this.isLoading = false;
188 |       this.isStreaming = false;
189 |     } catch (error) {
190 |       this.hasError = true;
191 |       this.isLoading = false;
192 |       this.isStreaming = false;
193 |       console.error(error);
194 |     }
195 |   }
196 | 
197 |   override requestUpdate(name?: string, oldValue?: any) {
198 |     if (name === 'messages') {
199 |       const messagesUpdatedEvent = new CustomEvent('messagesUpdated', {
200 |         detail: { messages: this.messages },
201 |         bubbles: true,
202 |       });
203 |       this.dispatchEvent(messagesUpdatedEvent);
204 |     } else if (name === 'hasError' || name === 'isLoading' || name === 'isStreaming') {
205 |       const state = {
206 |         hasError: this.hasError,
207 |         isLoading: this.isLoading,
208 |         isStreaming: this.isStreaming,
209 |       };
210 |       const stateUpdatedEvent = new CustomEvent('stateChanged', {
211 |         detail: { state },
212 |         bubbles: true,
213 |       });
214 |       this.dispatchEvent(stateUpdatedEvent);
215 |     }
216 | 
217 |     return super.requestUpdate(name, oldValue);
218 |   }
219 | 
220 |   protected scrollToLastMessage() {
221 |     // Need to be delayed to run after the DOM refresh
222 |     setTimeout(() => {
223 |       const { bottom } = this.messagesElement.getBoundingClientRect();
224 |       const { top } = this.chatInputElement.getBoundingClientRect();
225 |       if (bottom > top) {
226 |         window.scrollBy(0, bottom - top);
227 |       }
228 |     }, 0);
229 |   }
230 | 
231 |   protected renderSuggestions = (suggestions: string[]) => {
232 |     return html`
233 |       <section class="suggestions-container">
234 |         <h2>${this.options.strings.promptSuggestionsTitle}</h2>
235 |         <div class="suggestions">
236 |           ${map(
237 |             suggestions,
238 |             (suggestion) => html`
239 |               <button class="suggestion" @click=${() => this.onSuggestionClicked(suggestion)}>${suggestion}</button>
240 |             `,
241 |           )}
242 |         </div>
243 |       </section>
244 |     `;
245 |   };
246 | 
247 |   protected renderLoader = () => {
248 |     return this.isLoading && !this.isStreaming
249 |       ? html`
250 |           <div class="message assistant loader">
251 |             <div class="message-body">
252 |               <slot name="loader"><div class="loader-animation"></div></slot>
253 |               <div class="message-role">${this.options.strings.assistant}</div>
254 |             </div>
255 |           </div>
256 |         `
257 |       : nothing;
258 |   };
259 | 
260 |   protected renderMessage = (message: ParsedMessage) => {
261 |     return html`
262 |       <div class="message ${message.role} animation">
263 |         ${message.role === 'assistant'
264 |           ? html`<slot name="message-header">
265 |               <div class="debug-buttons">
266 |                 <button
267 |                   class="button"
268 |                   @click=${() => this.onShowDebugClicked(message.context)}
269 |                   title=${this.options.strings.showThoughtProcessTitle}
270 |                 >
271 |                   ${unsafeSVG(lightbulbSvg)}
272 |                 </button>
273 |               </div>
274 |             </slot>`
275 |           : nothing}
276 |         <div class="message-body">
277 |           <div class="content">${message.html}</div>
278 |           ${message.citations.length > 0
279 |             ? html`
280 |                 <div class="citations">
281 |                   <div class="citations-title">${this.options.strings.citationsTitle}</div>
282 |                   ${map(message.citations, this.renderCitation)}
283 |                 </div>
284 |               `
285 |             : nothing}
286 |         </div>
287 |         <div class="message-role">
288 |           ${message.role === 'user' ? this.options.strings.user : this.options.strings.assistant}
289 |         </div>
290 |       </div>
291 |     `;
292 |   };
293 | 
294 |   protected renderError = () => {
295 |     return html`
296 |       <div class="message assistant error">
297 |         <div class="message-body">
298 |           <span class="error-message">${this.options.strings.errorMessage}</span>
299 |           <button @click=${() => this.onSendClicked(true)}>${this.options.strings.retryButton}</button>
300 |         </div>
301 |       </div>
302 |     `;
303 |   };
304 | 
305 |   protected renderCitation = (citation: string, index: number) => {
306 |     return html`<button class="citation" @click=${() => this.onCitationClicked(citation)}>
307 |       ${index + 1}. ${citation}
308 |     </button>`;
309 |   };
310 | 
311 |   protected renderCitationLink = (citation: string, index: number) => {
312 |     return html`<button class="citation-link" @click=${() => this.onCitationClicked(citation)}>
313 |       <sup>[${index}]</sup>
314 |     </button>`;
315 |   };
316 | 
317 |   protected renderFollowupQuestions = (questions: string[]) => {
318 |     return questions.length > 0
319 |       ? html`
320 |           <div class="questions">
321 |             <span class="question-icon" title=${this.options.strings.followUpQuestionsTitle}>
322 |               ${unsafeSVG(questionSvg)} </span
323 |             >${map(
324 |               questions,
325 |               (question) => html`
326 |                 <button class="question animation" @click=${() => this.onSuggestionClicked(question)}>
327 |                   ${question}
328 |                 </button>
329 |               `,
330 |             )}
331 |           </div>
332 |         `
333 |       : nothing;
334 |   };
335 | 
336 |   protected renderChatInput = () => {
337 |     return html`
338 |       <div class="chat-input">
339 |         <button
340 |           class="button new-chat-button"
341 |           @click=${() => (this.messages = [])}
342 |           title=${this.options.strings.newChatButton}
343 |           .disabled=${this.messages?.length === 0 || this.isLoading || this.isStreaming}
344 |         >
345 |           ${unsafeSVG(newChatSvg)}
346 |         </button>
347 |         <form class="input-form">
348 |           <textarea
349 |             class="text-input"
350 |             placeholder="${this.options.strings.chatInputPlaceholder}"
351 |             .value=${this.question}
352 |             autocomplete="off"
353 |             @input=${(event) => (this.question = event.target.value)}
354 |             @keypress=${this.onKeyPressed}
355 |             .disabled=${this.isLoading}
356 |           ></textarea>
357 |           <button
358 |             class="submit-button"
359 |             @click=${() => this.onSendClicked()}
360 |             title="${this.options.strings.chatInputButtonLabel}"
361 |             .disabled=${this.isLoading || !this.question}
362 |           >
363 |             ${unsafeSVG(sendSvg)}
364 |           </button>
365 |         </form>
366 |       </div>
367 |     `;
368 |   };
369 | 
370 |   protected override render() {
371 |     const parsedMessages = this.messages.map((message) => parseMessageIntoHtml(message, this.renderCitationLink));
372 |     return html`
373 |       <section class="chat-container">
374 |         ${this.options.enablePromptSuggestions &&
375 |         this.options.promptSuggestions.length > 0 &&
376 |         this.messages.length === 0
377 |           ? this.renderSuggestions(this.options.promptSuggestions)
378 |           : nothing}
379 |         <div class="messages">
380 |           ${repeat(parsedMessages, (_, index) => index, this.renderMessage)} ${this.renderLoader()}
381 |           ${this.hasError ? this.renderError() : nothing}
382 |           ${this.renderFollowupQuestions(parsedMessages.at(-1)?.followupQuestions ?? [])}
383 |         </div>
384 |         ${this.renderChatInput()}
385 |       </section>
386 |       ${this.debugDetails
387 |         ? html`<section class="debug-details">
388 |             <azc-debug .details=${this.debugDetails} .options=${this.options}>
389 |               <button
390 |                 slot="close-button"
391 |                 class="button close-button"
392 |                 @click=${() => (this.debugDetails = undefined)}
393 |                 title=${this.options.strings.closeTitle}
394 |               >
395 |                 X
396 |               </button>
397 |             </azc-debug>
398 |           </section>`
399 |         : nothing}
400 |     `;
401 |   }
402 | 
403 |   static override styles = css`
404 |     :host {
405 |       /* Base properties */
406 |       --primary: var(--azc-primary, #07f);
407 |       --error: var(--azc-error, #e30);
408 |       --text-color: var(--azc-text-color, #000);
409 |       --text-invert-color: var(--azc--text-invert-color, #fff);
410 |       --disabled-color: var(--azc-disabled-color, #ccc);
411 |       --bg: var(--azc-bg, #eee);
412 |       --card-bg: var(--azc-card-bg, #fff);
413 |       --card-shadow: var(--azc-card-shadow, 0 0.3px 0.9px rgba(0 0 0 / 12%), 0 1.6px 3.6px rgba(0 0 0 / 16%));
414 |       --space-md: var(--azc-space-md, 12px);
415 |       --space-xl: var(--azc-space-xl, calc(var(--space-md) * 2));
416 |       --space-xs: var(--azc-space-xs, calc(var(--space-md) / 2));
417 |       --space-xxs: var(--azc-space-xs, calc(var(--space-md) / 4));
418 |       --border-radius: var(--azc-border-radius, 16px);
419 |       --focus-outline: var(--azc-focus-outline, 2px solid);
420 |       --overlay-color: var(--azc-overlay-color, rgba(0 0 0 / 40%));
421 | 
422 |       /* Component-specific properties */
423 |       --error-color: var(--azc-error-color, var(--error));
424 |       --error-border: var(--azc-error-border, none);
425 |       --error-bg: var(--azc-error-bg, var(--card-bg));
426 |       --retry-button-color: var(--azc-retry-button-color, var(--text-color));
427 |       --retry-button-bg: var(--azc-retry-button-bg, #f0f0f0);
428 |       --retry-button-bg-hover: var(--azc-retry-button-bg, #e5e5e5);
429 |       --retry-button-border: var(--azc-retry-button-border, none);
430 |       --suggestion-color: var(--azc-suggestion-color, var(--text-color));
431 |       --suggestion-border: var(--azc-suggestion-border, none);
432 |       --suggestion-bg: var(--azc-suggestion-bg, var(--card-bg));
433 |       --suggestion-shadow: var(--azc-suggestion-shadow, 0 6px 16px -1.5px rgba(141 141 141 / 30%));
434 |       --user-message-color: var(--azc-user-message-color, var(--text-invert-color));
435 |       --user-message-border: var(--azc-user-message-border, none);
436 |       --user-message-bg: var(--azc-user-message-bg, var(--primary));
437 |       --bot-message-color: var(--azc-bot-message-color, var(--text-color));
438 |       --bot-message-border: var(--azc-bot-message-border, none);
439 |       --citation-color: var(--azc-citation-color, var(--text-invert-color));
440 |       --bot-message-bg: var(--azc-bot-message-bg, var(--card-bg));
441 |       --citation-bg: var(--azc-citation-bg, var(--primary));
442 |       --citation-bg-hover: var(--azc-citation-bg, color-mix(in srgb, var(--primary), #000 10%));
443 |       --new-chat-button-color: var(--azc-button-color, var(--text-invert-color));
444 |       --new-chat-button-bg: var(--azc-new-chat-button-bg, var(--primary));
445 |       --new-chat-button-bg-hover: var(--azc-new-chat-button-bg, color-mix(in srgb, var(--primary), #000 10%));
446 |       --chat-input-color: var(--azc-chat-input-color, var(--text-color));
447 |       --chat-input-border: var(--azc-chat-input-border, none);
448 |       --chat-input-bg: var(--azc-chat-input-bg, var(--card-bg));
449 |       --submit-button-color: var(--azc-button-color, var(--primary));
450 |       --submit-button-border: var(--azc-submit-button-border, none);
451 |       --submit-button-bg: var(--azc-submit-button-bg, none);
452 |       --submit-button-bg-hover: var(--azc-submit-button-color, #f0f0f0);
453 |     }
454 |     *:focus-visible {
455 |       outline: var(--focus-outline) var(--primary);
456 |     }
457 |     .animation {
458 |       animation: 0.3s ease;
459 |     }
460 |     svg {
461 |       fill: currentColor;
462 |     }
463 |     button {
464 |       font-size: 1rem;
465 |       border-radius: calc(var(--border-radius) / 2);
466 |       outline: var(--focus-outline) transparent;
467 |       transition: outline 0.3s ease;
468 | 
469 |       &:not(:disabled) {
470 |         cursor: pointer;
471 |       }
472 |     }
473 |     .chat-container {
474 |       container-type: inline-size;
475 |       position: relative;
476 |       background: var(--bg);
477 |       font-family:
478 |         'Segoe UI',
479 |         -apple-system,
480 |         BlinkMacSystemFont,
481 |         Roboto,
482 |         'Helvetica Neue',
483 |         sans-serif;
484 |     }
485 |     .citation-link {
486 |       padding: 0;
487 |       color: var(--primary);
488 |       background: none;
489 |       border: none;
490 |       white-space: normal;
491 |     }
492 |     .citation {
493 |       font-size: 0.85rem;
494 |       color: var(--citation-color);
495 |       background: var(--citation-bg);
496 |       border: var(--citation-border);
497 |       padding: var(--space-xxs) var(--space-xs);
498 |       margin-right: var(--space-xs);
499 |       margin-top: var(--space-xs);
500 | 
501 |       &:hover {
502 |         background: var(--citation-bg-hover);
503 |       }
504 |     }
505 |     .citations-title {
506 |       font-weight: bold;
507 |     }
508 |     .suggestions-container {
509 |       text-align: center;
510 |       padding: var(--space-xl);
511 |     }
512 |     .suggestions {
513 |       display: flex;
514 |       gap: var(--space-md);
515 |     }
516 |     @container (width < 480px) {
517 |       .suggestions {
518 |         flex-direction: column;
519 |       }
520 |     }
521 | 
522 |     .suggestion {
523 |       flex: 1 1 0;
524 |       padding: var(--space-xl) var(--space-md);
525 |       color: var(--sugestion-color);
526 |       background: var(--suggestion-bg);
527 |       border: var(--suggestion-border);
528 |       border-radius: var(--border-radius);
529 |       box-shadow: var(--suggestion-shadow);
530 | 
531 |       &:hover {
532 |         outline: var(--focus-outline) var(--primary);
533 |       }
534 |     }
535 |     .messages {
536 |       padding: var(--space-xl);
537 |       display: flex;
538 |       flex-direction: column;
539 |       gap: var(--space-md);
540 |     }
541 |     .user {
542 |       align-self: end;
543 |       color: var(--user-message-color);
544 |       background: var(--user-message-bg);
545 |       border: var(--user-message-border);
546 |     }
547 |     .assistant {
548 |       color: var(--bot-message-color);
549 |       background: var(--bot-message-bg);
550 |       border: var(--bot-message-border);
551 |       box-shadow: var(--card-shadow);
552 |     }
553 |     .message {
554 |       position: relative;
555 |       width: auto;
556 |       max-width: 70%;
557 |       border-radius: var(--border-radius);
558 |       padding: var(--space-xl);
559 |       margin-bottom: var(--space-xl);
560 |       &.user {
561 |         animation-name: fade-in-up;
562 |       }
563 |     }
564 |     .message-body {
565 |       display: flex;
566 |       flex-direction: column;
567 |       gap: var(--space-md);
568 |     }
569 |     .content {
570 |       white-space: pre-line;
571 |     }
572 |     .message-role {
573 |       position: absolute;
574 |       right: var(--space-xl);
575 |       bottom: -1.25em;
576 |       color: var(--text-color);
577 |       font-size: 0.85rem;
578 |       opacity: 0.6;
579 |     }
580 |     .questions {
581 |       margin: var(--space-md) 0;
582 |       color: var(--primary);
583 |       text-align: right;
584 |     }
585 |     .question-icon {
586 |       vertical-align: middle;
587 |       display: inline-block;
588 |       height: 1.7rem;
589 |       width: 1.7rem;
590 |       margin-bottom: var(--space-xs);
591 |       margin-left: var(--space-xs);
592 |     }
593 |     .question {
594 |       position: relative;
595 |       padding: var(--space-xs) var(--space-md);
596 |       margin-bottom: var(--space-xs);
597 |       margin-left: var(--space-xs);
598 |       vertical-align: middle;
599 |       color: var(--primary);
600 |       background: var(--card-bg);
601 |       border: 1px solid var(--primary);
602 |       animation-name: fade-in-right;
603 |       &:hover {
604 |         background: color-mix(in srgb, var(--card-bg), var(--primary) 5%);
605 |       }
606 |     }
607 |     .debug-buttons {
608 |       display: flex;
609 |       justify-content: right;
610 |       gap: var(--space-md);
611 |       margin-bottom: var(--space-md);
612 |     }
613 |     .debug-details {
614 |       position: fixed;
615 |       inset: 0;
616 |       background: var(--overlay-color);
617 |     }
618 |     .button,
619 |     .submit-button {
620 |       display: flex;
621 |       align-items: center;
622 |       justify-content: center;
623 |       padding: var(--space-xs);
624 |       border: var(--button-border);
625 |       background: var(--submit-button-bg);
626 |       color: var(--submit-button-color);
627 |       &:disabled {
628 |         color: var(--disabled-color);
629 |       }
630 |       &:hover:not(:disabled) {
631 |         background: var(--submit-button-bg-hover);
632 |       }
633 |     }
634 |     .submit-button {
635 |       padding: 0;
636 |       width: 48px;
637 |     }
638 |     .close-button {
639 |       position: absolute;
640 |       top: var(--space-md);
641 |       right: var(--space-md);
642 |       width: auto;
643 |       padding: var(--space-md);
644 |       &:hover:not(:disabled) {
645 |         background: var(--card-bg);
646 |       }
647 |     }
648 |     .error {
649 |       color: var(--error-color);
650 |       background: var(--error-bg);
651 |       outline: var(--focus-outline) var(--error);
652 | 
653 |       & .message-body {
654 |         flex-direction: row;
655 |         align-items: center;
656 |       }
657 | 
658 |       & button {
659 |         flex: 0;
660 |         padding: var(--space-md);
661 |         color: var(--retry-button-color);
662 |         background: var(--retry-button-bg);
663 |         border: var(--retry-button-border);
664 | 
665 |         &:hover {
666 |           background: var(--retry-button-bg-hover);
667 |         }
668 |       }
669 |     }
670 |     .error-message {
671 |       flex: 1;
672 |     }
673 |     .chat-input {
674 |       --half-space-xl: calc(var(--space-xl) / 2);
675 |       position: sticky;
676 |       bottom: 0;
677 |       padding: var(--space-xl);
678 |       padding-top: var(--half-space-xl);
679 |       background: var(--bg);
680 |       box-shadow: 0 calc(-1 * var(--half-space-xl)) var(--half-space-xl) var(--bg);
681 |       display: flex;
682 |       gap: var(--space-md);
683 |     }
684 |     .new-chat-button {
685 |       width: 48px;
686 |       height: 48px;
687 |       padding: var(--space-md);
688 |       border-radius: 50%;
689 |       background: var(--new-chat-button-bg);
690 |       color: var(--new-chat-button-color);
691 |       font-size: 1.5rem;
692 |       &:hover:not(:disabled) {
693 |         background: var(--new-chat-button-bg-hover);
694 |         color: var(--new-chat-button-color);
695 |       }
696 |     }
697 |     .input-form {
698 |       display: flex;
699 |       flex: 1 auto;
700 |       background: var(--chat-input-bg);
701 |       border: var(--chat-input-border);
702 |       border-radius: var(--border-radius);
703 |       padding: var(--space-md);
704 |       box-shadow: var(--card-shadow);
705 |       outline: var(--focus-outline) transparent;
706 |       transition: outline 0.3s ease;
707 | 
708 |       &:has(.text-input:focus-visible) {
709 |         outline: var(--focus-outline) var(--primary);
710 |       }
711 |     }
712 |     .text-input {
713 |       padding: var(--space-xs);
714 |       font-family: inherit;
715 |       font-size: 1rem;
716 |       flex: 1 auto;
717 |       height: 3rem;
718 |       border: none;
719 |       resize: none;
720 |       background: none;
721 |       &::placeholder {
722 |         color: var(--text-color);
723 |         opacity: 0.4;
724 |       }
725 |       &:focus {
726 |         outline: none;
727 |       }
728 |       &:disabled {
729 |         opacity: 0.7;
730 |       }
731 |     }
732 |     .loader-animation {
733 |       width: 100px;
734 |       height: 4px;
735 |       border-radius: var(--border-radius);
736 |       overflow: hidden;
737 |       background-color: var(--primary);
738 |       transform: scaleX(0);
739 |       transform-origin: center left;
740 |       animation: cubic-bezier(0.85, 0, 0.15, 1) 2s infinite load-animation;
741 |     }
742 | 
743 |     @keyframes load-animation {
744 |       0% {
745 |         transform: scaleX(0);
746 |         transform-origin: center left;
747 |       }
748 |       50% {
749 |         transform: scaleX(1);
750 |         transform-origin: center left;
751 |       }
752 |       51% {
753 |         transform: scaleX(1);
754 |         transform-origin: center right;
755 |       }
756 |       100% {
757 |         transform: scaleX(0);
758 |         transform-origin: center right;
759 |       }
760 |     }
761 |     @keyframes fade-in-up {
762 |       0% {
763 |         opacity: 0.5;
764 |         top: 100px;
765 |       }
766 |       100% {
767 |         opacity: 1;
768 |         top: 0px;
769 |       }
770 |     }
771 |     @keyframes fade-in-right {
772 |       0% {
773 |         opacity: 0.5;
774 |         right: -100px;
775 |       }
776 |       100% {
777 |         opacity: 1;
778 |         right: 0;
779 |       }
780 |     }
781 |     @media (prefers-reduced-motion: reduce) {
782 |       .animation {
783 |         animation: none;
784 |       }
785 |     }
786 |   `;
787 | }
788 | 
789 | declare global {
790 |   interface HTMLElementTagNameMap {
791 |     'azc-chat': ChatComponent;
792 |   }
793 | }
794 | 


--------------------------------------------------------------------------------
/src/frontend/src/components/debug.ts:
--------------------------------------------------------------------------------
  1 | /* eslint-disable unicorn/template-indent */
  2 | import { LitElement, css, html } from 'lit';
  3 | import { map } from 'lit/directives/map.js';
  4 | import { customElement, property } from 'lit/decorators.js';
  5 | import { unsafeHTML } from 'lit/directives/unsafe-html.js';
  6 | import { type ChatDebugDetails } from '../models.js';
  7 | 
  8 | export type DebugComponentOptions = {
  9 |   strings: {
 10 |     thoughtsTitle: string;
 11 |     supportingContentTitle: string;
 12 |   };
 13 | };
 14 | 
 15 | @customElement('azc-debug')
 16 | export class DebugComponent extends LitElement {
 17 |   @property({ type: Object }) details: ChatDebugDetails = { thoughts: '', dataPoints: [] };
 18 |   @property({ type: Object }) options!: DebugComponentOptions;
 19 |   @property({ type: Boolean }) showThoughtProcess = true;
 20 | 
 21 |   protected renderThoughtProcess = (thoughtProcess: string) => {
 22 |     return html`${unsafeHTML(thoughtProcess)}`;
 23 |   };
 24 | 
 25 |   protected renderDataPoints = (dataPoints: string[]) => {
 26 |     const infos = dataPoints.map((dataPoint) => {
 27 |       const [title, ...extract] = dataPoint.split(':');
 28 |       return { title, extract: extract.join(':') };
 29 |     });
 30 |     return html`<div class="data-points">
 31 |       ${map(
 32 |         infos,
 33 |         (info) =>
 34 |           html`<div class="card">
 35 |             <div class="title">${info.title}</div>
 36 |             <div>${info.extract}</div>
 37 |           </div>`,
 38 |       )}
 39 |     </div>`;
 40 |   };
 41 | 
 42 |   protected override render() {
 43 |     return html`<aside class="debug-container">
 44 |       <slot name="close-button"></slot>
 45 |       <nav class="nav">
 46 |         <button class=${this.showThoughtProcess ? 'active' : ''} @click=${() => (this.showThoughtProcess = true)}>
 47 |           ${this.options.strings.thoughtsTitle}
 48 |         </button>
 49 |         <button class=${this.showThoughtProcess ? '' : 'active'} @click=${() => (this.showThoughtProcess = false)}>
 50 |           ${this.options.strings.supportingContentTitle}
 51 |         </button>
 52 |       </nav>
 53 |       <section class="content">
 54 |         ${this.showThoughtProcess
 55 |           ? this.renderThoughtProcess(this.details.thoughts)
 56 |           : this.renderDataPoints(this.details.dataPoints)}
 57 |       </section>
 58 |     </aside>`;
 59 |   }
 60 | 
 61 |   static override styles = css`
 62 |     *:focus-visible {
 63 |       outline: var(--focus-outline) var(--primary);
 64 |     }
 65 |     button {
 66 |       padding: var(--space-md);
 67 |       font-size: 1rem;
 68 |       outline: var(--focus-outline) transparent;
 69 |       transition: outline 0.3s ease;
 70 |       border: none;
 71 | 
 72 |       &:not(:disabled) {
 73 |         cursor: pointer;
 74 |       }
 75 |       &:hover:not(:disabled) {
 76 |         // TODO: separate out hover style
 77 |         background: var(--submit-button-bg-hover);
 78 |       }
 79 |     }
 80 |     .active {
 81 |       border-bottom: 3px solid var(--primary);
 82 |     }
 83 |     .nav {
 84 |       padding-bottom: var(--space-md);
 85 |     }
 86 |     .debug-container {
 87 |       position: absolute;
 88 |       inset: var(--space-xl);
 89 |       display: flex;
 90 |       flex-direction: column;
 91 |       border-radius: var(--border-radius);
 92 |       background: var(--bg);
 93 |       overflow: hidden;
 94 |       padding: var(--space-xl);
 95 |       margin: 0px auto;
 96 |       max-width: 1024px;
 97 |     }
 98 |     .content {
 99 |       flex: 1;
100 |       display: flex;
101 |       flex-direction: column;
102 |       overflow: auto;
103 |     }
104 |     .title {
105 |       font-weight: bold;
106 |       margin-bottom: var(--space-md);
107 |     }
108 |     .card {
109 |       padding: var(--space-md);
110 |       margin-bottom: var(--space-md);
111 |       border-radius: var(--border-radius);
112 |       // TODO: separate out card styles
113 |       color: var(--bot-message-color);
114 |       background: var(--bot-message-bg);
115 |       border: var(--bot-message-border);
116 |       box-shadow: var(--card-shadow);
117 |     }
118 |   `;
119 | }
120 | 
121 | declare global {
122 |   interface HTMLElementTagNameMap {
123 |     'azc-debug': DebugComponent;
124 |   }
125 | }
126 | 


--------------------------------------------------------------------------------
/src/frontend/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from './api.js';
2 | export * from './components/chat.js';
3 | export * from './components/debug.js';
4 | export * from './message-parser.js';
5 | export * from './models.js';
6 | 


--------------------------------------------------------------------------------
/src/frontend/src/message-parser.ts:
--------------------------------------------------------------------------------
 1 | import { type HTMLTemplateResult, html, nothing } from 'lit';
 2 | import { type ChatMessage, type ChatMessageContext } from './models.js';
 3 | 
 4 | export type ParsedMessage = {
 5 |   html: HTMLTemplateResult;
 6 |   citations: string[];
 7 |   followupQuestions: string[];
 8 |   role: string;
 9 |   context?: ChatMessageContext;
10 | };
11 | 
12 | export function parseMessageIntoHtml(
13 |   message: ChatMessage,
14 |   renderCitationReference: (citation: string, index: number) => HTMLTemplateResult,
15 | ): ParsedMessage {
16 |   if (message.role === 'user') {
17 |     return {
18 |       html: html`${message.content}`,
19 |       citations: [],
20 |       followupQuestions: [],
21 |       role: message.role,
22 |       context: message.context,
23 |     };
24 |   }
25 | 
26 |   const citations: string[] = [];
27 |   const followupQuestions: string[] = [];
28 | 
29 |   // Extract any follow-up questions that might be in the message
30 |   const text = message.content
31 |     .replaceAll(/<<([^>]+)>>/g, (_match, content) => {
32 |       followupQuestions.push(content);
33 |       return '';
34 |     })
35 |     .split('<<')[0] // Truncate incomplete questions
36 |     .trim();
37 | 
38 |   // Extract any citations that might be in the message
39 |   const parts = text.split(/\[([^\]]+)]/g);
40 |   const result = html`${parts.map((part, index) => {
41 |     if (index % 2 === 0) {
42 |       return html`${part}`;
43 |     } else if (index + 1 < parts.length) {
44 |       // Handle only completed citations
45 |       let citationIndex = citations.indexOf(part);
46 |       if (citationIndex === -1) {
47 |         citations.push(part);
48 |         citationIndex = citations.length;
49 |       } else {
50 |         citationIndex++;
51 |       }
52 |       return renderCitationReference(part, citationIndex);
53 |     } else {
54 |       return nothing;
55 |     }
56 |   })}`;
57 | 
58 |   return {
59 |     html: result,
60 |     citations,
61 |     followupQuestions,
62 |     role: message.role,
63 |     context: message.context,
64 |   };
65 | }
66 | 


--------------------------------------------------------------------------------
/src/frontend/src/models.ts:
--------------------------------------------------------------------------------
 1 | export type Message = {
 2 |   content: string;
 3 |   role: string;
 4 | };
 5 | 
 6 | export type ChatDebugDetails = {
 7 |   thoughts: string;
 8 |   dataPoints: string[];
 9 | };
10 | 
11 | export type ChatMessageContext = Record<string, any> & {
12 |   thoughts?: string;
13 |   data_points?: string[];
14 | };
15 | 
16 | export type ChatMessage = Message & {
17 |   context?: ChatMessageContext;
18 | };
19 | 
20 | export type ChatResponse = {
21 |   choices: Array<{
22 |     index: number;
23 |     message: ChatMessage;
24 |   }>;
25 |   error?: string;
26 | };
27 | 
28 | export type ChatResponseChunk = {
29 |   choices: Array<{
30 |     index: number;
31 |     delta: Partial<ChatMessage>;
32 |   }>;
33 |   error?: string;
34 | };
35 | 
36 | export type Approaches = 'rtr' | 'rrr';
37 | 
38 | export type RetrievalMode = 'hybrid' | 'vectors' | 'text';
39 | 
40 | export type ChatRequestOptions = {
41 |   messages: Message[];
42 |   stream: boolean;
43 |   approach: Approaches;
44 |   suggestFollowupQuestions: boolean;
45 |   chunkIntervalMs: number;
46 |   apiUrl: string;
47 | } & ChatRequestOverrides;
48 | 
49 | export type ChatRequestOverrides = {
50 |   retrievalMode?: RetrievalMode;
51 |   semanticRanker?: boolean;
52 |   semanticCaptions?: boolean;
53 |   excludeCategory?: string;
54 |   top?: number;
55 |   temperature?: number;
56 |   promptTemplate?: string;
57 |   promptTemplatePrefix?: string;
58 |   promptTemplateSuffix?: string;
59 | };
60 | 


--------------------------------------------------------------------------------
/src/frontend/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/src/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "esnext",
 4 |     "module": "esnext",
 5 |     "lib": ["esnext", "DOM", "DOM.Iterable"],
 6 |     "strict": true,
 7 |     "outDir": "./dist",
 8 |     "rootDir": "./src",
 9 |     "declaration": true,
10 |     "declarationMap": true,
11 |     "sourceMap": true,
12 |     "inlineSources": true,
13 |     "noUnusedLocals": true,
14 |     "noUnusedParameters": true,
15 |     "noImplicitReturns": true,
16 |     "noFallthroughCasesInSwitch": true,
17 |     "noImplicitAny": false,
18 |     "noImplicitThis": true,
19 |     "moduleResolution": "node",
20 |     "allowSyntheticDefaultImports": true,
21 |     "experimentalDecorators": true,
22 |     "forceConsistentCasingInFileNames": true,
23 |     "noImplicitOverride": true,
24 |     "emitDeclarationOnly": true,
25 |     "useDefineForClassFields": false,
26 |     "plugins": [
27 |       {
28 |         "name": "ts-lit-plugin",
29 |         "strict": true
30 |       }
31 |     ]
32 |   },
33 |   "include": ["src/**/*.ts"]
34 | }
35 | 


--------------------------------------------------------------------------------
/src/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vite';
 2 | 
 3 | // Expose environment variables to the client
 4 | process.env.VITE_BACKEND_API_URI = process.env.BACKEND_API_URI ?? '';
 5 | console.log(`Using chat API base URL: "${process.env.VITE_BACKEND_API_URI}"`);
 6 | 
 7 | export default defineConfig({
 8 |   build: {
 9 |     outDir: './dist',
10 |     emptyOutDir: true,
11 |     sourcemap: true,
12 |     rollupOptions: {
13 |       output: {
14 |         manualChunks: (id) => {
15 |           if (id.includes('node_modules')) {
16 |             return 'vendor';
17 |           }
18 |         },
19 |       },
20 |     },
21 |   },
22 |   server: {
23 |     proxy: {
24 |       '/chat': 'http://127.0.0.1:3000',
25 |     },
26 |   },
27 | });
28 | 


--------------------------------------------------------------------------------
/src/indexer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | 
 3 | # Build Node.js app
 4 | # ------------------------------------
 5 | FROM node:18-alpine as build
 6 | WORKDIR /app
 7 | COPY ./package*.json ./
 8 | COPY ./src/indexer ./src/indexer
 9 | RUN npm ci --cache /tmp/empty-cache
10 | RUN npm run build --workspace=indexer
11 | 
12 | # Run Node.js app
13 | # ------------------------------------
14 | FROM node:18-alpine
15 | ENV NODE_ENV=production
16 | 
17 | WORKDIR /app
18 | COPY ./package*.json ./
19 | COPY ./src/indexer/package.json ./src/indexer/
20 | RUN npm ci --omit=dev --workspace=indexer --cache /tmp/empty-cache
21 | COPY --from=build app/src/indexer/dist src/indexer/dist
22 | EXPOSE 3001
23 | CMD [ "npm", "start", "--workspace=indexer" ]
24 | 


--------------------------------------------------------------------------------
/src/indexer/README.md:
--------------------------------------------------------------------------------
 1 | # Document indexer
 2 | 
 3 | This project was bootstrapped with [Fastify-CLI](https://www.npmjs.com/package/fastify-cli).
 4 | 
 5 | ## Available Scripts
 6 | 
 7 | In the project directory, you can run:
 8 | 
 9 | ### `npm run dev`
10 | 
11 | To start the app in dev mode.\
12 | Open [http://localhost:3001](http://localhost:3001) to view it in the browser.
13 | 
14 | ### `npm run build`
15 | 
16 | To build the app for production to the `dist` folder.
17 | 
18 | ### `npm start`
19 | 
20 | For production mode
21 | 
22 | ## Learn More
23 | 
24 | To learn Fastify, check out the [Fastify documentation](https://www.fastify.io/docs/latest/).
25 | 


--------------------------------------------------------------------------------
/src/indexer/bin/index-files.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | import process from 'node:process';
3 | import { run } from '../dist/lib/cli.js';
4 | 
5 | run(process.argv);
6 | 


--------------------------------------------------------------------------------
/src/indexer/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "indexer",
 3 |   "version": "1.0.0",
 4 |   "description": "Document indexer service",
 5 |   "private": true,
 6 |   "type": "module",
 7 |   "exports": "./src/app.ts",
 8 |   "directories": {
 9 |     "test": "test"
10 |   },
11 |   "bin": {
12 |     "index-files": "./bin/index-files.js"
13 |   },
14 |   "scripts": {
15 |     "start": "fastify start -l info dist/app.js -p 3001",
16 |     "build": "tsc",
17 |     "watch": "tsc -w",
18 |     "dev": "npm run build && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"npm:watch\" \"npm:dev:start\"",
19 |     "dev:start": "fastify start --pretty-logs --ignore-watch=.ts$ -w -l debug -p 3001 dist/app.js",
20 |     "docker:build": "docker build --tag indexer --file ./Dockerfile ../..",
21 |     "docker:run": "docker run --rm --publish 3001:3001 --env-file ../../.env indexer",
22 |     "clean": "npx rimraf dist"
23 |   },
24 |   "dependencies": {
25 |     "@azure/identity": "4.0.0",
26 |     "@azure/search-documents": "12.0.0-beta.3",
27 |     "@dqbd/tiktoken": "^1.0.7",
28 |     "@fastify/autoload": "^5.0.0",
29 |     "@fastify/cors": "^8.3.0",
30 |     "@fastify/multipart": "^8.0.0",
31 |     "@fastify/sensible": "^5.0.0",
32 |     "@fastify/type-provider-json-schema-to-ts": "^2.2.2",
33 |     "commander": "^11.0.0",
34 |     "dotenv": "^16.3.1",
35 |     "fastify": "^4.22.2",
36 |     "fastify-cli": "^5.7.0",
37 |     "fastify-plugin": "^4.0.0",
38 |     "mime": "^3.0.0",
39 |     "openai": "^4.4.0",
40 |     "pdfjs-dist": "^4.0.189"
41 |   },
42 |   "devDependencies": {
43 |     "@types/mime": "^3.0.1",
44 |     "@types/node": "^18.0.0",
45 |     "concurrently": "^8.2.0",
46 |     "fastify-tsconfig": "^2.0.0",
47 |     "ts-node": "^10.9.1",
48 |     "typescript": "^5.1.6"
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/indexer/src/app.ts:
--------------------------------------------------------------------------------
 1 | import path, { join } from 'node:path';
 2 | import { fileURLToPath } from 'node:url';
 3 | import { type FastifyPluginAsync } from 'fastify';
 4 | import AutoLoad, { type AutoloadPluginOptions } from '@fastify/autoload';
 5 | import cors from '@fastify/cors';
 6 | 
 7 | export type AppOptions = {
 8 |   // Place your custom options for app below here.
 9 | } & Partial<AutoloadPluginOptions>;
10 | 
11 | // Pass --options via CLI arguments in command to enable these options.
12 | const options: AppOptions = {};
13 | 
14 | const __filename = fileURLToPath(import.meta.url);
15 | const __dirname = path.dirname(__filename);
16 | 
17 | const app: FastifyPluginAsync<AppOptions> = async (fastify, options_): Promise<void> => {
18 |   // Place here your custom code!
19 | 
20 |   fastify.register(cors, {});
21 | 
22 |   // Do not touch the following lines
23 | 
24 |   // This loads all plugins defined in plugins
25 |   // those should be support plugins that are reused
26 |   // through your application
27 |   fastify.register(AutoLoad, {
28 |     dir: join(__dirname, 'plugins'),
29 |     options: options_,
30 |   });
31 | 
32 |   // This loads all plugins defined in routes
33 |   // define your routes in one of these
34 |   fastify.register(AutoLoad, {
35 |     dir: join(__dirname, 'routes'),
36 |     options: options_,
37 |   });
38 | };
39 | 
40 | export default app;
41 | export { app, options };
42 | 


--------------------------------------------------------------------------------
/src/indexer/src/lib/cli.ts:
--------------------------------------------------------------------------------
  1 | import process from 'node:process';
  2 | import fs from 'node:fs/promises';
  3 | import { fileURLToPath } from 'node:url';
  4 | import { join, dirname, extname } from 'node:path';
  5 | import { type OptionValues, program } from 'commander';
  6 | import * as dotenv from 'dotenv';
  7 | import mime from 'mime/lite.js';
  8 | 
  9 | export interface IndexFilesOptions {
 10 |   indexerUrl: string;
 11 |   indexName?: string;
 12 |   category?: string;
 13 |   useVectors: boolean;
 14 |   wait: boolean;
 15 | }
 16 | 
 17 | const __dirname = dirname(fileURLToPath(import.meta.url));
 18 | 
 19 | export async function run(arguments_: string[] = process.argv) {
 20 |   dotenv.config();
 21 | 
 22 |   const file = await fs.readFile(join(__dirname, '../../package.json'), 'utf8');
 23 |   const packageJson = JSON.parse(file) as Record<string, string>;
 24 | 
 25 |   program
 26 |     .name('index-files')
 27 |     .arguments('<files...>')
 28 |     .description('CLI utility to send files to an indexer service instance')
 29 |     .option('-u, --indexer-url <url>', 'The indexer service URL', 'http://localhost:3001')
 30 |     .option('-i, --index-name <name>', 'The name of the target index', process.env.AZURE_SEARCH_INDEX || 'kbindex')
 31 |     .option('-c, --category <name>', 'Set document category')
 32 |     .option('-w, --wait', 'Wait for the indexer to finish processing the files', false)
 33 |     .option('--no-vectors', 'Disable vectors generation for the files')
 34 |     .version(packageJson.version, '-v, --version', 'Show the current version')
 35 |     .showHelpAfterError()
 36 |     .action(async (files: string[], options: OptionValues) => {
 37 |       const { indexerUrl, indexName, vectors, wait } = options;
 38 |       await indexFiles(files, {
 39 |         indexerUrl,
 40 |         indexName,
 41 |         useVectors: vectors,
 42 |         wait,
 43 |       });
 44 |     });
 45 |   program.parse(arguments_);
 46 | }
 47 | 
 48 | export async function indexFiles(files: string[], options: IndexFilesOptions) {
 49 |   try {
 50 |     if (!options.indexName) {
 51 |       throw new Error('Index name is required');
 52 |     }
 53 |     console.log(`Indexing ${files.length} file(s)...`);
 54 |     await ensureSearchIndex(options);
 55 | 
 56 |     for (const file of files) {
 57 |       await indexFile(file, options);
 58 |     }
 59 | 
 60 |     console.log('Completed.');
 61 |   } catch (_error: unknown) {
 62 |     const error = _error as Error;
 63 |     console.error(`Error indexing files: ${error.message}`);
 64 |     process.exitCode = 1;
 65 |   }
 66 | }
 67 | 
 68 | async function ensureSearchIndex(options: IndexFilesOptions) {
 69 |   const { indexerUrl, indexName } = options;
 70 |   const response = await fetch(`${indexerUrl}/indexes`, {
 71 |     method: 'POST',
 72 |     headers: {
 73 |       'Content-Type': 'application/json',
 74 |     },
 75 |     body: JSON.stringify({
 76 |       name: indexName?.trim(),
 77 |     }),
 78 |   });
 79 |   if (!response.ok) {
 80 |     const errorDetails = (await response.json()) as any;
 81 |     throw new Error(`Index creating "${indexName}": ${errorDetails.message}`);
 82 |   }
 83 | }
 84 | 
 85 | async function indexFile(file: string, options: IndexFilesOptions) {
 86 |   console.log(`Indexing file "${file}"...`);
 87 |   const { indexerUrl, indexName, category, useVectors, wait } = options;
 88 |   const formData = new FormData();
 89 |   const fileIndexOptions = {
 90 |     category,
 91 |     useVectors,
 92 |     wait,
 93 |   };
 94 |   const type = mime.getType(extname(file)) ?? 'application/octet-stream';
 95 |   const fileData = await fs.readFile(file);
 96 |   formData.append('file', new Blob([fileData], { type }), file);
 97 |   formData.append('options', JSON.stringify(fileIndexOptions));
 98 |   const response = await fetch(`${indexerUrl}/indexes/${indexName}/files`, {
 99 |     method: 'POST',
100 |     body: formData,
101 |   });
102 |   if (!response.ok) {
103 |     const errorDetails = (await response.json()) as any;
104 |     throw new Error(`Error indexing file "${file}": ${errorDetails.message}`);
105 |   }
106 |   console.log(`File "${file}" indexed successfully`);
107 | }
108 | 


--------------------------------------------------------------------------------
/src/indexer/src/lib/document-processor.ts:
--------------------------------------------------------------------------------
  1 | import path from 'node:path';
  2 | import { type BaseLogger } from 'pino';
  3 | import * as pdfjs from 'pdfjs-dist';
  4 | import { type TextItem } from 'pdfjs-dist/types/src/display/api.js';
  5 | 
  6 | export interface Document {
  7 |   filename: string;
  8 |   type: string;
  9 |   category: string;
 10 |   sections: Section[];
 11 | }
 12 | 
 13 | export interface Section {
 14 |   id: string;
 15 |   content: string;
 16 |   category: string;
 17 |   sourcepage: string;
 18 |   sourcefile: string;
 19 |   embedding?: number[];
 20 | }
 21 | 
 22 | export interface ContentPage {
 23 |   content: string;
 24 |   offset: number;
 25 |   page: number;
 26 | }
 27 | 
 28 | export interface ContentSection {
 29 |   content: string;
 30 |   page: number;
 31 | }
 32 | 
 33 | const SENTENCE_ENDINGS = new Set(['.', '!', '?']);
 34 | const WORD_BREAKS = new Set([',', ';', ':', ' ', '(', ')', '[', ']', '{', '}', '\t', '\n']);
 35 | const MAX_SECTION_LENGTH = 1000;
 36 | const SENTENCE_SEARCH_LIMIT = 100;
 37 | const SECTION_OVERLAP = 100;
 38 | 
 39 | export class DocumentProcessor {
 40 |   constructor(private logger: BaseLogger) {}
 41 | 
 42 |   async createDocumentFromFile(filename: string, data: Buffer, type: string, category: string) {
 43 |     const pages = await this.extractText(data, type);
 44 |     const contentSections = this.splitPages(filename, pages);
 45 |     const sections = await this.createSections(filename, contentSections, category);
 46 |     return { filename, type, category, sections };
 47 |   }
 48 | 
 49 |   private async extractText(data: Buffer, type: string): Promise<ContentPage[]> {
 50 |     const pages: ContentPage[] = [];
 51 |     if (type === 'text/plain' || type === 'text/markdown') {
 52 |       const text = data.toString('utf8');
 53 |       pages.push({ content: text, offset: 0, page: 0 });
 54 |     } else if (type === 'application/pdf') {
 55 |       const pdfContent = await extractTextFromPdf(data);
 56 |       pages.push(...pdfContent);
 57 |     } else {
 58 |       // You can add support for other file types here
 59 |       throw new Error(`Unsupported file type: ${type}`);
 60 |     }
 61 | 
 62 |     return pages;
 63 |   }
 64 | 
 65 |   private async createSections(filename: string, contentSections: ContentSection[], category: string) {
 66 |     const fileId = filenameToId(filename);
 67 |     const sections: Section[] = [];
 68 | 
 69 |     for (const [index, { content, page }] of contentSections.entries()) {
 70 |       const section: Section = {
 71 |         id: `${fileId}-page-${page}-section-${index}`,
 72 |         content,
 73 |         category: category,
 74 |         sourcepage: path.basename(filename),
 75 |         sourcefile: filename,
 76 |       };
 77 | 
 78 |       sections.push(section);
 79 |     }
 80 |     return sections;
 81 |   }
 82 | 
 83 |   private splitPages(filename: string, pages: ContentPage[]): ContentSection[] {
 84 |     this.logger.debug(`Splitting '${filename}' into sections`);
 85 | 
 86 |     const findPage = (offset: number): number => {
 87 |       const pageCount = pages.length;
 88 |       for (let i = 0; i < pageCount - 1; i++) {
 89 |         if (offset >= pages[i].offset && offset < pages[i + 1].offset) {
 90 |           return pages[i].page;
 91 |         }
 92 |       }
 93 |       return pages[pageCount - 1].page;
 94 |     };
 95 | 
 96 |     const contentSections: ContentSection[] = [];
 97 |     const allText = pages.map((page) => page.content).join('');
 98 |     const length = allText.length;
 99 |     let start = 0;
100 |     let end = length;
101 | 
102 |     while (start + SECTION_OVERLAP < length) {
103 |       let lastWord = -1;
104 |       end = start + MAX_SECTION_LENGTH;
105 | 
106 |       if (end > length) {
107 |         end = length;
108 |       } else {
109 |         // Try to find the end of the sentence
110 |         while (
111 |           end < length &&
112 |           end - start - MAX_SECTION_LENGTH < SENTENCE_SEARCH_LIMIT &&
113 |           !SENTENCE_ENDINGS.has(allText[end])
114 |         ) {
115 |           if (WORD_BREAKS.has(allText[end])) {
116 |             lastWord = end;
117 |           }
118 |           end += 1;
119 |         }
120 |         if (end < length && !SENTENCE_ENDINGS.has(allText[end]) && lastWord > 0) {
121 |           end = lastWord; // Fall back to at least keeping a whole word
122 |         }
123 |         if (end < length) {
124 |           end += 1;
125 |         }
126 |       }
127 | 
128 |       // Try to find the start of the sentence or at least a whole word boundary
129 |       lastWord = -1;
130 |       while (
131 |         start > 0 &&
132 |         start > end - MAX_SECTION_LENGTH - 2 * SENTENCE_SEARCH_LIMIT &&
133 |         !SENTENCE_ENDINGS.has(allText[start])
134 |       ) {
135 |         if (WORD_BREAKS.has(allText[start])) {
136 |           lastWord = start;
137 |         }
138 |         start -= 1;
139 |       }
140 |       if (!SENTENCE_ENDINGS.has(allText[start]) && lastWord > 0) {
141 |         start = lastWord;
142 |       }
143 |       if (start > 0) {
144 |         start += 1;
145 |       }
146 | 
147 |       const sectionText = allText.slice(start, end);
148 |       contentSections.push({ page: findPage(start), content: sectionText });
149 | 
150 |       const lastTableStart = sectionText.lastIndexOf('<table');
151 |       if (lastTableStart > 2 * SENTENCE_SEARCH_LIMIT && lastTableStart > sectionText.lastIndexOf('</table')) {
152 |         // If the section ends with an unclosed table, we need to start the next section with the table.
153 |         // If table starts inside SENTENCE_SEARCH_LIMIT, we ignore it, as that will cause an infinite loop for tables longer than MAX_SECTION_LENGTH
154 |         // If last table starts inside SECTION_OVERLAP, keep overlapping
155 |         const page = findPage(start);
156 |         this.logger.debug(
157 |           `Section ends with unclosed table, starting next section with the table at page ${page} offset ${start} table start ${lastTableStart}`,
158 |         );
159 |         start = Math.min(end - SECTION_OVERLAP, start + lastTableStart);
160 |       } else {
161 |         start = end - SECTION_OVERLAP;
162 |       }
163 |     }
164 | 
165 |     if (start + SECTION_OVERLAP < end) {
166 |       contentSections.push({ content: allText.slice(start, end), page: findPage(start) });
167 |     }
168 | 
169 |     return contentSections;
170 |   }
171 | }
172 | 
173 | function filenameToId(filename: string) {
174 |   const filenameAscii = filename.replaceAll(/[^\w-]/g, '_');
175 |   const filenameHash = Buffer.from(filename, 'utf8').toString('hex');
176 |   return `file-${filenameAscii}-${filenameHash}`;
177 | }
178 | 
179 | async function extractTextFromPdf(data: Buffer): Promise<ContentPage[]> {
180 |   const pages: ContentPage[] = [];
181 |   const pdf = await pdfjs.getDocument(new Uint8Array(data)).promise;
182 |   let offset = 0;
183 | 
184 |   for (let i = 1; i <= pdf.numPages; i++) {
185 |     const page = await pdf.getPage(i);
186 |     const textContent = await page.getTextContent();
187 |     let previousY = 0;
188 |     const text = textContent.items
189 |       .filter((item) => 'str' in item)
190 |       .map((item) => {
191 |         const text = item as TextItem;
192 |         const y = text.transform[5];
193 |         let string_ = text.str;
194 |         if (y !== previousY && previousY !== 0) {
195 |           string_ = '\n' + string_;
196 |         }
197 |         previousY = y;
198 |         return string_;
199 |       })
200 |       .join('');
201 | 
202 |     pages.push({ content: text + '\n', offset, page: i });
203 |     offset += text.length;
204 |   }
205 |   return pages;
206 | }
207 | 


--------------------------------------------------------------------------------
/src/indexer/src/lib/index.ts:
--------------------------------------------------------------------------------
1 | export * from './cli.js';
2 | export * from './document-processor.js';
3 | export * from './indexer.js';
4 | export * from './model-limits.js';
5 | 


--------------------------------------------------------------------------------
/src/indexer/src/lib/indexer.ts:
--------------------------------------------------------------------------------
  1 | import { type BaseLogger } from 'pino';
  2 | import { type SearchIndex } from '@azure/search-documents';
  3 | import { encoding_for_model, type TiktokenModel } from '@dqbd/tiktoken';
  4 | import { type AzureClients } from '../plugins/azure.js';
  5 | import { type OpenAiService } from '../plugins/openai.js';
  6 | import { DocumentProcessor, type Section } from './document-processor.js';
  7 | import { MODELS_SUPPORTED_BATCH_SIZE } from './model-limits.js';
  8 | 
  9 | export interface IndexFileOptions {
 10 |   useVectors?: boolean;
 11 |   throwErrors?: boolean;
 12 | }
 13 | 
 14 | export interface FileInfos {
 15 |   filename: string;
 16 |   data: Buffer;
 17 |   type: string;
 18 |   category: string;
 19 | }
 20 | 
 21 | const INDEXING_BATCH_SIZE = 1000;
 22 | 
 23 | export class Indexer {
 24 |   constructor(
 25 |     private logger: BaseLogger,
 26 |     private azure: AzureClients,
 27 |     private openai: OpenAiService,
 28 |     private embeddingModelName: string = 'text-embedding-ada-002',
 29 |   ) {}
 30 | 
 31 |   async createSearchIndex(indexName: string) {
 32 |     this.logger.debug(`Ensuring search index "${indexName}" exists`);
 33 | 
 34 |     const searchIndexClient = this.azure.searchIndex;
 35 | 
 36 |     const names: string[] = [];
 37 |     const indexNames = await searchIndexClient.listIndexes();
 38 |     for await (const index of indexNames) {
 39 |       names.push(index.name);
 40 |     }
 41 |     if (names.includes(indexName)) {
 42 |       this.logger.debug(`Search index "${indexName}" already exists`);
 43 |     } else {
 44 |       const index: SearchIndex = {
 45 |         name: indexName,
 46 |         fields: [
 47 |           {
 48 |             name: 'id',
 49 |             type: 'Edm.String',
 50 |             key: true,
 51 |           },
 52 |           {
 53 |             name: 'content',
 54 |             type: 'Edm.String',
 55 |             searchable: true,
 56 |             analyzerName: 'en.microsoft',
 57 |           },
 58 |           {
 59 |             name: 'embedding',
 60 |             type: 'Collection(Edm.Single)',
 61 |             hidden: false,
 62 |             searchable: true,
 63 |             filterable: false,
 64 |             sortable: false,
 65 |             facetable: false,
 66 |             vectorSearchDimensions: 1536,
 67 |             vectorSearchConfiguration: 'default',
 68 |           },
 69 |           {
 70 |             name: 'category',
 71 |             type: 'Edm.String',
 72 |             filterable: true,
 73 |             facetable: true,
 74 |           },
 75 |           {
 76 |             name: 'sourcepage',
 77 |             type: 'Edm.String',
 78 |             filterable: true,
 79 |             facetable: true,
 80 |           },
 81 |           {
 82 |             name: 'sourcefile',
 83 |             type: 'Edm.String',
 84 |             filterable: true,
 85 |             facetable: true,
 86 |           },
 87 |         ],
 88 |         semanticSettings: {
 89 |           configurations: [
 90 |             {
 91 |               name: 'default',
 92 |               prioritizedFields: {
 93 |                 prioritizedContentFields: [{ name: 'content' }],
 94 |               },
 95 |             },
 96 |           ],
 97 |         },
 98 |         vectorSearch: {
 99 |           algorithmConfigurations: [
100 |             {
101 |               name: 'default',
102 |               kind: 'hnsw',
103 |               parameters: {
104 |                 metric: 'cosine',
105 |               },
106 |             },
107 |           ],
108 |         },
109 |       };
110 |       this.logger.debug(`Creating "${indexName}" search index...`);
111 |       await searchIndexClient.createIndex(index);
112 |     }
113 |   }
114 | 
115 |   async deleteSearchIndex(indexName: string) {
116 |     this.logger.debug(`Deleting search index "${indexName}"`);
117 |     const searchIndexClient = this.azure.searchIndex;
118 |     await searchIndexClient.deleteIndex(indexName);
119 |   }
120 | 
121 |   async indexFile(indexName: string, fileInfos: FileInfos, options: IndexFileOptions = {}) {
122 |     const { filename, data, type, category } = fileInfos;
123 |     this.logger.debug(`Indexing file "${filename}" into search index "${indexName}..."`);
124 | 
125 |     try {
126 |       const documentProcessor = new DocumentProcessor(this.logger);
127 |       const document = await documentProcessor.createDocumentFromFile(filename, data, type, category);
128 |       const sections = document.sections;
129 |       if (options.useVectors) {
130 |         await this.updateEmbeddingsInBatch(sections);
131 |       }
132 | 
133 |       const searchClient = this.azure.searchIndex.getSearchClient(indexName);
134 | 
135 |       const batchSize = INDEXING_BATCH_SIZE;
136 |       let batch: Section[] = [];
137 | 
138 |       for (let index = 0; index < sections.length; index++) {
139 |         batch.push(sections[index]);
140 | 
141 |         if (batch.length === batchSize || index === sections.length - 1) {
142 |           const { results } = await searchClient.uploadDocuments(batch);
143 |           const succeeded = results.filter((r) => r.succeeded).length;
144 |           const indexed = batch.length;
145 |           this.logger.debug(`Indexed ${indexed} sections, ${succeeded} succeeded`);
146 |           batch = [];
147 |         }
148 |       }
149 |     } catch (_error: unknown) {
150 |       const error = _error as Error;
151 |       if (options.throwErrors) {
152 |         throw error;
153 |       } else {
154 |         this.logger.error(`Error indexing file "${filename}": ${error.message}`);
155 |       }
156 |     }
157 |   }
158 | 
159 |   async deleteFromIndex(indexName: string, filename?: string) {
160 |     this.logger.debug(`Removing sections from "${filename ?? '<all>'}" from search index "${indexName}"`);
161 |     const searchClient = this.azure.searchIndex.getSearchClient(indexName);
162 | 
163 |     // eslint-disable-next-line no-constant-condition
164 |     while (true) {
165 |       const filter = filename ? `sourcefile eq '${filename}'` : undefined;
166 |       const r = await searchClient.search('', { filter: filter, top: 1000, includeTotalCount: true });
167 |       if (r.count === 0) {
168 |         break;
169 |       }
170 |       const documents: any[] = [];
171 |       for await (const d of r.results) {
172 |         documents.push({ id: (d.document as any).id });
173 |       }
174 | 
175 |       const { results } = await searchClient.deleteDocuments(documents);
176 |       this.logger.debug(`Removed ${results.length} sections from index`);
177 | 
178 |       // It can take a few seconds for search results to reflect changes, so wait a bit
179 |       await wait(2000);
180 |     }
181 |   }
182 | 
183 |   async createEmbedding(text: string): Promise<number[]> {
184 |     // TODO: add retry
185 |     const embeddingsClient = await this.openai.getEmbeddings();
186 |     const result = await embeddingsClient.create({ input: text, model: this.embeddingModelName });
187 |     return result.data[0].embedding;
188 |   }
189 | 
190 |   async createEmbeddingsInBatch(texts: string[]): Promise<Array<number[]>> {
191 |     // TODO: add retry
192 |     const embeddingsClient = await this.openai.getEmbeddings();
193 |     const result = await embeddingsClient.create({ input: texts, model: this.embeddingModelName });
194 |     return result.data.map((d) => d.embedding);
195 |   }
196 | 
197 |   async updateEmbeddingsInBatch(sections: Section[]): Promise<Section[]> {
198 |     const batchSize = MODELS_SUPPORTED_BATCH_SIZE[this.embeddingModelName];
199 |     const batchQueue: Section[] = [];
200 |     let tokenCount = 0;
201 | 
202 |     for (const [index, section] of sections.entries()) {
203 |       tokenCount += getTokenCount(section.content, this.embeddingModelName);
204 |       batchQueue.push(section);
205 | 
206 |       if (
207 |         tokenCount > batchSize.tokenLimit ||
208 |         batchQueue.length >= batchSize.maxBatchSize ||
209 |         index === sections.length - 1
210 |       ) {
211 |         const embeddings = await this.createEmbeddingsInBatch(batchQueue.map((section) => section.content));
212 |         for (const [index_, section] of batchQueue.entries()) section.embedding = embeddings[index_];
213 |         this.logger.debug(`Batch Completed. Batch size ${batchQueue.length} Token count ${tokenCount}`);
214 | 
215 |         batchQueue.length = 0;
216 |         tokenCount = 0;
217 |       }
218 |     }
219 | 
220 |     return sections;
221 |   }
222 | }
223 | 
224 | export function getTokenCount(input: string, model: string): number {
225 |   const encoder = encoding_for_model(model as TiktokenModel);
226 |   const tokens = encoder.encode(input).length;
227 |   encoder.free();
228 |   return tokens;
229 | }
230 | 
231 | export async function wait(ms: number): Promise<void> {
232 |   return new Promise((resolve) => setTimeout(resolve, ms));
233 | }
234 | 


--------------------------------------------------------------------------------
/src/indexer/src/lib/model-limits.ts:
--------------------------------------------------------------------------------
 1 | export interface ModelLimit {
 2 |   tokenLimit: number;
 3 |   maxBatchSize: number;
 4 | }
 5 | 
 6 | export const MODELS_SUPPORTED_BATCH_SIZE: Record<string, ModelLimit> = {
 7 |   'text-embedding-ada-002': {
 8 |     tokenLimit: 8100,
 9 |     maxBatchSize: 16,
10 |   },
11 | };
12 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/README.md:
--------------------------------------------------------------------------------
 1 | # Plugins Folder
 2 | 
 3 | Plugins define behavior that is common to all the routes in your
 4 | application. Authentication, caching, templates, and all the other cross
 5 | cutting concerns should be handled by plugins placed in this folder.
 6 | 
 7 | Files in this folder are typically defined through the
 8 | [`fastify-plugin`](https://github.com/fastify/fastify-plugin) module,
 9 | making them non-encapsulated. They can define decorators and set hooks
10 | that will then be used in the rest of your application.
11 | 
12 | Check out:
13 | 
14 | - [The hitchhiker's guide to plugins](https://www.fastify.io/docs/latest/Guides/Plugins-Guide/)
15 | - [Fastify decorators](https://www.fastify.io/docs/latest/Reference/Decorators/).
16 | - [Fastify lifecycle](https://www.fastify.io/docs/latest/Reference/Lifecycle/).
17 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/azure.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import { DefaultAzureCredential } from '@azure/identity';
 3 | import { SearchIndexClient } from '@azure/search-documents';
 4 | 
 5 | export type AzureClients = {
 6 |   credential: DefaultAzureCredential;
 7 |   searchIndex: SearchIndexClient;
 8 | };
 9 | 
10 | export default fp(
11 |   async (fastify, _options) => {
12 |     const config = fastify.config;
13 | 
14 |     // Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage
15 |     // (no secrets needed, just use 'az login' locally, and managed identity when deployed on Azure).
16 |     // If you need to use keys, use separate AzureKeyCredential instances with the keys for each service
17 |     const credential = new DefaultAzureCredential();
18 | 
19 |     // Set up Azure clients
20 |     const searchIndexClient = new SearchIndexClient(
21 |       `https://${config.azureSearchService}.search.windows.net`,
22 |       credential,
23 |     );
24 | 
25 |     fastify.decorate('azure', {
26 |       credential,
27 |       searchIndex: searchIndexClient,
28 |     });
29 |   },
30 |   {
31 |     name: 'azure',
32 |     dependencies: ['config'],
33 |   },
34 | );
35 | 
36 | // When using .decorate you have to specify added properties for Typescript
37 | declare module 'fastify' {
38 |   export interface FastifyInstance {
39 |     azure: AzureClients;
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/config.ts:
--------------------------------------------------------------------------------
 1 | import process from 'node:process';
 2 | import path from 'node:path';
 3 | import * as dotenv from 'dotenv';
 4 | import fp from 'fastify-plugin';
 5 | 
 6 | export interface AppConfig {
 7 |   azureSearchService: string;
 8 |   azureSearchIndex: string;
 9 |   azureOpenAiUrl: string;
10 |   azureOpenAiEmbeddingDeployment: string;
11 |   azureOpenAiEmbeddingModel: string;
12 |   kbFieldsContent: string;
13 |   kbFieldsSourcePage: string;
14 | }
15 | 
16 | const camelCaseToUpperSnakeCase = (s: string) => s.replaceAll(/[A-Z]/g, (l) => `_${l}`).toUpperCase();
17 | 
18 | export default fp(
19 |   async (fastify, _options) => {
20 |     const environmentPath = path.resolve(process.cwd(), '../../.env');
21 | 
22 |     console.log(`Loading .env config from ${environmentPath}...`);
23 |     dotenv.config({ path: environmentPath });
24 | 
25 |     const config: AppConfig = {
26 |       azureSearchService: process.env.AZURE_SEARCH_SERVICE || '',
27 |       azureSearchIndex: process.env.AZURE_SEARCH_INDEX || 'kbindex',
28 |       azureOpenAiUrl: process.env.AZURE_OPENAI_URL || '',
29 |       azureOpenAiEmbeddingDeployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT || 'embedding',
30 |       azureOpenAiEmbeddingModel: process.env.AZURE_OPENAI_EMBEDDING_MODEL || 'text-embedding-ada-002',
31 |       kbFieldsContent: process.env.KB_FIELDS_CONTENT || 'content',
32 |       kbFieldsSourcePage: process.env.KB_FIELDS_SOURCEPAGE || 'sourcepage',
33 |     };
34 | 
35 |     // Check that all config values are set
36 |     for (const [key, value] of Object.entries(config)) {
37 |       if (!value) {
38 |         const variableName = camelCaseToUpperSnakeCase(key).replace('OPEN_AI', 'OPENAI');
39 |         const message = `${variableName} environment variable must be set`;
40 |         fastify.log.error(message);
41 |         throw new Error(message);
42 |       }
43 |     }
44 | 
45 |     fastify.decorate('config', config);
46 |   },
47 |   {
48 |     name: 'config',
49 |   },
50 | );
51 | 
52 | // When using .decorate you have to specify added properties for Typescript
53 | declare module 'fastify' {
54 |   export interface FastifyInstance {
55 |     config: AppConfig;
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/indexer.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import { Indexer } from '../lib/index.js';
 3 | 
 4 | export default fp(
 5 |   async (fastify, _options) => {
 6 |     const config = fastify.config;
 7 | 
 8 |     fastify.decorate(
 9 |       'indexer',
10 |       new Indexer(fastify.log, fastify.azure, fastify.openai, config.azureOpenAiEmbeddingModel),
11 |     );
12 |   },
13 |   {
14 |     name: 'indexer',
15 |     dependencies: ['config', 'azure', 'openai'],
16 |   },
17 | );
18 | 
19 | // When using .decorate you have to specify added properties for Typescript
20 | declare module 'fastify' {
21 |   export interface FastifyInstance {
22 |     indexer: Indexer;
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/multipart.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import multipart from '@fastify/multipart';
 3 | 
 4 | const FILE_UPLOAD_LIMIT = 20 * 1024 * 1024; // 20 MB
 5 | 
 6 | export default fp(async (fastify) => {
 7 |   fastify.register(multipart, {
 8 |     attachFieldsToBody: true,
 9 |     sharedSchemaId: 'multipartField',
10 |     limits: {
11 |       fileSize: FILE_UPLOAD_LIMIT,
12 |       files: 1,
13 |     },
14 |   });
15 | });
16 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/openai.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import { type AccessToken } from '@azure/identity';
 3 | import { OpenAI } from 'openai';
 4 | import { type Embeddings } from 'openai/resources/index';
 5 | 
 6 | export type OpenAiService = {
 7 |   getEmbeddings(): Promise<Embeddings>;
 8 |   getApiToken(): Promise<string>;
 9 |   config: {
10 |     apiVersion: string;
11 |     apiUrl: string;
12 |   };
13 | };
14 | 
15 | const AZURE_OPENAI_API_VERSION = '2023-05-15';
16 | const AZURE_COGNITIVE_SERVICES_AD_SCOPE = 'https://cognitiveservices.azure.com/.default';
17 | 
18 | export default fp(
19 |   async (fastify, _options) => {
20 |     const config = fastify.config;
21 | 
22 |     fastify.log.info(`Using OpenAI at ${config.azureOpenAiUrl}`);
23 | 
24 |     let openAiToken: AccessToken;
25 |     let embeddingsClient: OpenAI;
26 | 
27 |     const refreshOpenAiToken = async () => {
28 |       if (!openAiToken || openAiToken.expiresOnTimestamp < Date.now() + 60 * 1000) {
29 |         openAiToken = await fastify.azure.credential.getToken(AZURE_COGNITIVE_SERVICES_AD_SCOPE);
30 | 
31 |         const commonOptions = {
32 |           apiKey: openAiToken.token,
33 |           defaultQuery: { 'api-version': AZURE_OPENAI_API_VERSION },
34 |           defaultHeaders: { 'api-key': openAiToken.token },
35 |         };
36 | 
37 |         embeddingsClient = new OpenAI({
38 |           ...commonOptions,
39 |           baseURL: `${config.azureOpenAiUrl}/openai/deployments/${config.azureOpenAiEmbeddingDeployment}`,
40 |         });
41 |       }
42 |     };
43 | 
44 |     fastify.decorate('openai', {
45 |       async getEmbeddings() {
46 |         await refreshOpenAiToken();
47 |         return embeddingsClient.embeddings;
48 |       },
49 |       async getApiToken() {
50 |         await refreshOpenAiToken();
51 |         return openAiToken.token;
52 |       },
53 |       config: {
54 |         apiVersion: AZURE_OPENAI_API_VERSION,
55 |         apiUrl: config.azureOpenAiUrl,
56 |       },
57 |     });
58 |   },
59 |   {
60 |     name: 'openai',
61 |     dependencies: ['azure', 'config'],
62 |   },
63 | );
64 | 
65 | // When using .decorate you have to specify added properties for Typescript
66 | declare module 'fastify' {
67 |   export interface FastifyInstance {
68 |     openai: OpenAiService;
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/indexer/src/plugins/sensible.ts:
--------------------------------------------------------------------------------
 1 | import fp from 'fastify-plugin';
 2 | import sensible, { type SensibleOptions } from '@fastify/sensible';
 3 | 
 4 | /**
 5 |  * This plugins adds some utilities to handle http errors
 6 |  * @see https://github.com/fastify/fastify-sensible
 7 |  */
 8 | export default fp<SensibleOptions>(async (fastify) => {
 9 |   fastify.register(sensible);
10 | 
11 |   fastify.addSchema({
12 |     $id: 'httpError',
13 |     type: 'object',
14 |     properties: {
15 |       statusCode: { type: 'number' },
16 |       code: { type: 'string' },
17 |       error: { type: 'string' },
18 |       message: { type: 'string' },
19 |     },
20 |   });
21 | });
22 | 


--------------------------------------------------------------------------------
/src/indexer/src/routes/README.md:
--------------------------------------------------------------------------------
 1 | # Routes Folder
 2 | 
 3 | Routes define endpoints within your application. Fastify provides an
 4 | easy path to a microservice architecture, in the future you might want
 5 | to independently deploy some of those.
 6 | 
 7 | In this folder you should define all the routes that define the endpoints
 8 | of your web application.
 9 | Each service is a [Fastify
10 | plugin](https://www.fastify.io/docs/latest/Reference/Plugins/), it is
11 | encapsulated (it can have its own independent plugins) and it is
12 | typically stored in a file; be careful to group your routes logically,
13 | e.g. all `/users` routes in a `users.js` file. We have added
14 | a `root.js` file for you with a '/' root added.
15 | 
16 | If a single file become too large, create a folder and add a `index.js` file there:
17 | this file must be a Fastify plugin, and it will be loaded automatically
18 | by the application. You can now add as many files as you want inside that folder.
19 | In this way you can create complex routes within a single monolith,
20 | and eventually extract them.
21 | 
22 | If you need to share functionality between routes, place that
23 | functionality into the `plugins` folder, and share it via
24 | [decorators](https://www.fastify.io/docs/latest/Reference/Decorators/).
25 | 


--------------------------------------------------------------------------------
/src/indexer/src/routes/indexes/index.ts:
--------------------------------------------------------------------------------
  1 | import { type FastifyPluginAsyncJsonSchemaToTs } from '@fastify/type-provider-json-schema-to-ts';
  2 | 
  3 | export interface IndexFileOptionsField {
  4 |   category?: string;
  5 |   wait?: boolean;
  6 |   useVectors?: boolean;
  7 | }
  8 | 
  9 | const root: FastifyPluginAsyncJsonSchemaToTs = async (fastify, _options): Promise<void> => {
 10 |   fastify.post('/', {
 11 |     schema: {
 12 |       description: 'Create a new search index',
 13 |       tags: ['indexes'],
 14 |       body: {
 15 |         type: 'object',
 16 |         properties: {
 17 |           name: {
 18 |             type: 'string',
 19 |           },
 20 |         },
 21 |         required: ['name'],
 22 |       },
 23 |       response: {
 24 |         204: {
 25 |           description: 'Successfully created index',
 26 |           type: 'null',
 27 |         },
 28 |         400: { $ref: 'httpError' },
 29 |         500: { $ref: 'httpError' },
 30 |       },
 31 |     } as const,
 32 |     handler: async function (request, reply) {
 33 |       const { name } = request.body;
 34 |       try {
 35 |         await fastify.indexer.createSearchIndex(name);
 36 |         reply.code(204);
 37 |       } catch (_error: unknown) {
 38 |         const error = _error as Error;
 39 |         fastify.log.error(error);
 40 |         reply.internalServerError(`Unknown server error: ${error.message}`);
 41 |       }
 42 |     },
 43 |   });
 44 | 
 45 |   fastify.delete('/:name', {
 46 |     schema: {
 47 |       description: 'Delete a search index',
 48 |       tags: ['indexes'],
 49 |       params: {
 50 |         type: 'object',
 51 |         properties: {
 52 |           name: {
 53 |             type: 'string',
 54 |           },
 55 |         },
 56 |         required: ['name'],
 57 |       },
 58 |       response: {
 59 |         204: {
 60 |           description: 'Successfully deleted index',
 61 |           type: 'null',
 62 |         },
 63 |         500: { $ref: 'httpError' },
 64 |       },
 65 |     } as const,
 66 |     handler: async function (request, reply) {
 67 |       const { name } = request.params;
 68 |       try {
 69 |         await fastify.indexer.deleteSearchIndex(name);
 70 |         reply.code(204);
 71 |       } catch (_error: unknown) {
 72 |         const error = _error as Error;
 73 |         fastify.log.error(error);
 74 |         reply.internalServerError(`Unknown server error: ${error.message}`);
 75 |       }
 76 |     },
 77 |   });
 78 | 
 79 |   fastify.post('/:name/files', {
 80 |     schema: {
 81 |       description: 'Upload a file for indexing',
 82 |       tags: ['indexes'],
 83 |       consumes: ['multipart/form-data'],
 84 |       params: {
 85 |         type: 'object',
 86 |         properties: {
 87 |           name: {
 88 |             type: 'string',
 89 |           },
 90 |         },
 91 |         required: ['name'],
 92 |       },
 93 |       body: {
 94 |         type: 'object',
 95 |         properties: {
 96 |           options: { $ref: 'multipartField' },
 97 |           // TODO: missing proper file type from ajv plugin
 98 |           file: { $ref: 'multipartField' },
 99 |         },
100 |         required: ['file'],
101 |       },
102 |       response: {
103 |         202: {
104 |           description: 'File indexing started',
105 |           type: 'null',
106 |         },
107 |         204: {
108 |           description: 'File indexing completed',
109 |           type: 'null',
110 |         },
111 |         400: { $ref: 'httpError' },
112 |         500: { $ref: 'httpError' },
113 |       },
114 |     } as const,
115 |     handler: async function (request, reply) {
116 |       // TOFIX: issue in types generation
117 |       // https://github.com/fastify/fastify-type-provider-json-schema-to-ts/issues/57
118 |       const { file, options } = (request as any).body;
119 |       if (file.type !== 'file') {
120 |         return reply.badRequest('field "file" must be a file');
121 |       }
122 |       if (options && options.type !== 'field') {
123 |         return reply.badRequest('field "options" must be a value');
124 |       }
125 |       try {
126 |         const fileOptions = JSON.parse(options?.value ?? '{}') as IndexFileOptionsField;
127 |         fastify.log.info(`Received indexing options: ${JSON.stringify(fileOptions)}`);
128 | 
129 |         const wait = Boolean(fileOptions?.wait);
130 |         const filesInfos = {
131 |           filename: file.filename,
132 |           data: await file.toBuffer(),
133 |           type: file.mimetype,
134 |           category: fileOptions?.category ?? 'default',
135 |         };
136 |         if (wait) {
137 |           fastify.log.info(`Indexing file "${filesInfos.filename}" synchronously`);
138 |           await fastify.indexer.indexFile(request.params.name, filesInfos, {
139 |             useVectors: fileOptions?.useVectors ?? true,
140 |             throwErrors: true,
141 |           });
142 |           reply.code(204);
143 |         } else {
144 |           // Do not await this, we want to return 202 immediately
145 |           fastify.indexer.indexFile(request.params.name, filesInfos);
146 |           reply.code(202);
147 |         }
148 |       } catch (_error: unknown) {
149 |         const error = _error as Error;
150 |         fastify.log.error(error);
151 |         reply.internalServerError(`Unknown server error: ${error.message}`);
152 |       }
153 |     },
154 |   });
155 | 
156 |   fastify.delete('/:name/files/:filename', {
157 |     schema: {
158 |       description: 'Delete a file from the index',
159 |       tags: ['indexes'],
160 |       params: {
161 |         type: 'object',
162 |         properties: {
163 |           name: {
164 |             type: 'string',
165 |           },
166 |           filename: {
167 |             type: 'string',
168 |           },
169 |         },
170 |         required: ['name', 'filename'],
171 |       },
172 |       response: {
173 |         204: {
174 |           description: 'Successfully deleted file',
175 |           type: 'null',
176 |         },
177 |         500: { $ref: 'httpError' },
178 |       },
179 |     } as const,
180 |     handler: async function (request, reply) {
181 |       const { name, filename } = request.params;
182 |       try {
183 |         await fastify.indexer.deleteFromIndex(name, filename);
184 |         reply.code(204);
185 |       } catch (_error: unknown) {
186 |         const error = _error as Error;
187 |         fastify.log.error(error);
188 |         reply.internalServerError(`Unknown server error: ${error.message}`);
189 |       }
190 |     },
191 |   });
192 | };
193 | 
194 | export default root;
195 | 


--------------------------------------------------------------------------------
/src/indexer/src/routes/root.ts:
--------------------------------------------------------------------------------
 1 | import fs from 'node:fs/promises';
 2 | import path from 'node:path';
 3 | import { fileURLToPath } from 'node:url';
 4 | import { type FastifyPluginAsync } from 'fastify';
 5 | 
 6 | const __dirname = path.dirname(fileURLToPath(import.meta.url));
 7 | 
 8 | const root: FastifyPluginAsync = async (fastify, _options): Promise<void> => {
 9 |   fastify.get('/', async function (_request, _reply) {
10 |     const packageJson = JSON.parse(await fs.readFile(path.join(__dirname, '../../package.json'), 'utf8'));
11 |     return {
12 |       service: packageJson.name,
13 |       description: packageJson.description,
14 |       version: packageJson.version,
15 |     };
16 |   });
17 | };
18 | 
19 | export default root;
20 | 


--------------------------------------------------------------------------------
/src/indexer/test.http:
--------------------------------------------------------------------------------
 1 | ##################################################################
 2 | # VS Code with REST Client extension is needed to use this file.
 3 | # Download at: https://aka.ms/vscode/rest-client
 4 | ##################################################################
 5 | 
 6 | @api_host = http://localhost:3001
 7 | 
 8 | # Create an index
 9 | POST {{api_host}}/indexes
10 | Content-Type: application/json
11 | 
12 | {
13 |   "name": "test"
14 | }
15 | 
16 | ###
17 | 
18 | # Delete an index
19 | DELETE {{api_host}}/indexes/test
20 | 
21 | ###
22 | 
23 | # Index a text file
24 | POST {{api_host}}/indexes/test/files
25 | Accept: */*
26 | Content-Type: multipart/form-data; boundary=Boundary
27 | 
28 | --Boundary
29 | Content-Disposition: form-data; name="file"; filename="readme.md"
30 | Content-Type: text/markdown
31 | 
32 | < ../../README.md
33 | --Boundary
34 | Content-Disposition: form-data; name="options"
35 | 
36 | {
37 |   "category": "test-category",
38 |   "wait": true,
39 |   "useVectors": true
40 | }
41 | --Boundary--
42 | 
43 | ###
44 | 
45 | # Index a pdf file
46 | POST {{api_host}}/indexes/test/files
47 | Accept: */*
48 | Content-Type: multipart/form-data; boundary=Boundary
49 | 
50 | --Boundary
51 | Content-Disposition: form-data; name="file"; filename="test.pdf"
52 | Content-Type: application/pdf
53 | 
54 | < ../../data/support.pdf
55 | --Boundary
56 | Content-Disposition: form-data; name="options"
57 | 
58 | {
59 |   "category": "test-category",
60 |   "wait": true,
61 |   "useVectors": true
62 | }
63 | --Boundary--
64 | 
65 | ###
66 | 
67 | # Delete a file
68 | DELETE {{api_host}}/indexes/test/files/readme.md
69 | 


--------------------------------------------------------------------------------
/src/indexer/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "fastify-tsconfig",
 3 |   "compilerOptions": {
 4 |     "outDir": "dist",
 5 |     "module": "esnext",
 6 |     "moduleResolution": "node",
 7 |     "sourceMap": true,
 8 |     "esModuleInterop": true,
 9 |     "lib": ["esnext"]
10 |   },
11 |   "include": ["src/**/*.ts"]
12 | }
13 | 


--------------------------------------------------------------------------------