├── .devcontainer └── devcontainer.json ├── .gitignore ├── LICENSE ├── README.md ├── app ├── backend │ ├── Dockerfile │ ├── app.py │ └── requirements.txt └── frontend │ ├── Dockerfile │ ├── app.py │ └── requirements.txt ├── assets ├── azure-infra-architecture.png ├── deployment-step-1.png ├── deployment-step-2.png ├── deployment-step-3.png ├── deployment-step-4.png └── sentiment-analysis-demo.gif ├── azure.yaml ├── data └── feedback_events.json ├── infra ├── abbreviations.json ├── core │ ├── ai │ │ └── cognitiveservices.bicep │ ├── data │ │ └── event-hubs.bicep │ ├── host │ │ ├── container-app.bicep │ │ ├── container-apps-environment.bicep │ │ ├── container-apps.bicep │ │ └── container-registry.bicep │ ├── monitor │ │ ├── applicationinsights.bicep │ │ ├── loganalytics.bicep │ │ └── monitoring.bicep │ └── security │ │ └── role.bicep ├── main.bicep └── main.parameters.json ├── requirements_dev.txt └── scripts ├── load_env.ps1 └── load_env.sh /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Data streaming for AI app Demo", 3 | "image": "mcr.microsoft.com/devcontainers/python:3.11", 4 | "features": { 5 | "ghcr.io/devcontainers/features/powershell:1.1.0": {}, 6 | "ghcr.io/devcontainers/features/azure-cli:1.0.8": {}, 7 | "ghcr.io/azure/azure-dev/azd:latest": {} 8 | }, 9 | "customizations": { 10 | "vscode": { 11 | "extensions": [ 12 | "ms-azuretools.azure-dev", 13 | "ms-azuretools.vscode-bicep", 14 | "ms-azuretools.vscode-docker", 15 | "ms-python.python" 16 | ] 17 | } 18 | }, 19 | "postAttachCommand": "", 20 | "remoteUser": "vscode", 21 | "hostRequirements": { 22 | "memory": "8gb", 23 | "cpus": 2 24 | } 25 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Azure az webapp deployment details 2 | .azure 3 | *_env 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # Cython debug symbols 142 | cython_debug/ 143 | 144 | # NPM 145 | npm-debug.log* 146 | node_modules 147 | static/ 148 | 149 | data/*.md5 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 pathway-labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data streaming for real-time enterprise AI apps 2 | 3 | This repository demonstrates how to build real-time generative AI applications using [Azure Event Hubs](https://learn.microsoft.com/en-us/azure/event-hubs/azure-event-hubs-kafka-overview) + [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) + [Pathway](https://pathway.com/)’s [LLM App](https://github.com/pathwaycom/llm-app)+[Streamlit](https://streamlit.io/). 4 | 5 | - [Data streaming for real-time enterprise AI apps](#data-streaming-for-real-time-enterprise-ai-apps) 6 | - [Motivation](#motivation) 7 | - [Example scenario: Customer support and sentiment analysis dashboard](#example-scenario-customer-support-and-sentiment-analysis-dashboard) 8 | - [Background](#background) 9 | - [Implementation](#implementation) 10 | - [Overview of the Azure services the sample project uses](#overview-of-the-azure-services-the-sample-project-uses) 11 | - [Azure infrastructure with the main components](#azure-infrastructure-with-the-main-components) 12 | - [One click running app demo](#one-click-running-app-demo) 13 | - [Setup the project](#setup-the-project) 14 | - [Prerequisites](#prerequisites) 15 | - [Open in GitHub Codespaces](#open-in-github-codespaces) 16 | - [Open in Dev Container](#open-in-dev-container) 17 | - [Local environment](#local-environment) 18 | - [Run the project locally](#run-the-project-locally) 19 | - [Deploy from scratch](#deploy-from-scratch) 20 | - [Deploy with existing Azure resources](#deploy-with-existing-azure-resources) 21 | - [Existing Azure resource group](#existing-azure-resource-group) 22 | - [Existing Azure OpenAI resource](#existing-azure-openai-resource) 23 | 24 | ## Motivation 25 | 26 | Real-time AI app needs real-time data to respond with the most up-to-date information to user queries or perform quick actions autonomously. To reduce cost and infrastructural complexity, you can build a real-time data pipeline with Azure Event Hubs, Pathway, and Azure OpenAI. This integrated system leverages the strengths of Pathway for robust data processing, LLMs like GPT for advanced text analytics, and Streamlit for user-friendly data visualization. 27 | 28 | This combination empowers businesses to build and deploy enterprise AI applications that provide the freshest contextual visual data. 29 | 30 | ## Example scenario: Customer support and sentiment analysis dashboard 31 | 32 | ### Background 33 | 34 | For example, a multinational corporation wants to improve its customer support by analyzing customer feedback and inquiries in real-time. They aim to understand common issues, track customer sentiment, and identify areas for improvement in their products and services. To achieve this, they need a system that can process large data streams, analyze text for insights, and present these insights in an accessible way. 35 | 36 | ### Implementation 37 | 38 | 1. **Azure Event Hubs & Kafka: Real-Time Data Streaming and Processing** 39 | 40 | Azure Event Hubs collects real-time data from various sources, such as customer feedback forms, support chat logs, and social media mentions. This data is then streamed into a Kafka cluster for further processing. 41 | 42 | 2. **Large Language Models (LLMs) like GPT from Azure OpenAI: Text Analysis and Sentiment Detection** 43 | 44 | The text data from Kafka is fed into an LLM for natural language processing using Pathway. This model performs sentiment analysis, key phrase extraction, and feedback categorization (e.g., identifying common issues or topics). 45 | 46 | 3. **Pathway to enable real-time data pipeline** 47 | 48 | Pathway gains access to the data streams from Azure Event Hubs, it preprocesses, transforms, or joins them and the [LLM App](https://github.com/pathwaycom/llm-app) helps to bring real-time context to the AI App with real-time vector indexing, semantic search, and retrieval capabilities. The text content of the events will be sent to Azure OpenAI embedding APIs via the LLM App to compute the embeddings and vector representations will be indexed. 49 | 50 | Using the LLM app, the company can gain deep insights from unstructured text data, understanding the sentiment and nuances of customer feedback. 51 | 52 | 4. **Streamlit: Interactive Dashboard for Visualization** 53 | 54 | Streamlit is used to create an interactive web dashboard that visualizes the insights derived from customer feedback. This dashboard can show real-time metrics such as overall sentiment trends, and common topics in customer feedback, and even alert the team to emerging issues (See [example](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/drive_alert) implementation of alerting to enhance this project). 55 | 56 | ### Overview of the Azure services the sample project uses 57 | 58 | | Service | Purpose | 59 | | --- | --- | 60 | | [Azure AI Services](https://azure.microsoft.com/en-us/products/ai-services?activetab=pivot:azureopenaiservicetab) | To use Azure OpenAI GPT model and embeddings. | 61 | | [Azure Event Hubs](https://azure.microsoft.com/en-us/products/event-hubs) | To stream real-time events from various data sources. | 62 | | [Azure Container Apps](https://learn.microsoft.com/azure/container-apps/?WT.mc_id=javascript-0000-cxa) | Hosts our containerized applications (backend and frontend) with features like auto-scaling and load balancing. | 63 | | [Azure Container Registry](https://learn.microsoft.com/azure/container-registry/?WT.mc_id=javascript-0000-cxa) | Stores our Docker container images in a managed, private registry. | 64 | | [Azure Log Analytics](https://learn.microsoft.com/azure/log-analytics/?WT.mc_id=javascript-0000-cxa) | Collects and analyzes telemetry and logs for insights into application performance and diagnostics. | 65 | | [Azure Monitor](https://learn.microsoft.com/azure/azure-monitor/?WT.mc_id=javascript-0000-cxa) | Provides comprehensive monitoring of our applications, infrastructure, and network. | 66 | 67 | ### Azure infrastructure with the main components 68 | 69 | ![Azure Infrastructure Diagram](/assets/azure-infra-architecture.png) 70 | 71 | ### One click running app demo 72 | 73 | Follow the link to see the running UI app in Azure: 74 | 75 | [Customer support and sentiment analysis dashboard](https://frontend.greensmoke-e214d1a7.francecentral.azurecontainerapps.io/) 76 | 77 | It builds a real-time dashboard based on an example prompt we provided to analyze the data. 78 | 79 | ## Setup the project 80 | 81 | To set up the project you need to follow the below steps: 82 | 83 | 1. You have an Azure account with the required settings specified in the [Prerequisites](#prerequisites) section. 84 | 2. Choose one of these environments to open the project: 85 | 1. [GitHub Codespaces](#open-in-github-codespaces). 86 | 2. [VS Code Dev Containers](#open-in-dev-container). 87 | 3. [Local environment](#local-environment). 88 | 3. Follow the [deploy from scratch](#deploy-from-scratch) or [deploy with existing Azure resources](#deploy-with-existing-azure-resources) guide. 89 | 90 | ### Prerequisites 91 | 92 | **Azure account requirements:** To run and deploy the example project, you'll need: 93 | 94 | - **Azure account**. If you're new to Azure, [get an Azure account for free](https://azure.microsoft.com/free/cognitive-search/) and you'll get some free Azure credits to get started. 95 | - **Azure subscription with access enabled for the Azure OpenAI service**. You can apply for access to Azure OpenAI by completing the form at https://aka.ms/oai/access. 96 | 97 | ### Open in GitHub Codespaces 98 | 99 | Follow these steps to open the project in a Codespace: 100 | 101 | 1. Click here to open in GitHub Codespaces 102 | 103 | [![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=lightgrey&logo=github)](https://codespaces.new/pathway-labs/azure-openai-real-time-data-app) 104 | 105 | 2. Next -> [deploy from scratch](#deploy-from-scratch) or [deploy with existing Azure resources](#deploy-with-existing-azure-resources). 106 | 107 | ### Open in Dev Container 108 | 109 | 1. Click here to open in Dev Container 110 | 111 | [![Open in Dev Container](https://img.shields.io/static/v1?style=for-the-badge&label=Dev+Container&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/pathway-labs/azure-openai-real-time-data-app) 112 | 113 | 2. Next -> [deploy from scratch](#deploy-from-scratch) or [deploy with existing Azure resources](#deploy-with-existing-azure-resources). 114 | 115 | ### Local environment 116 | 117 | First, install the required tools: 118 | 119 | - [Azure Developer CLI](https://aka.ms/azure-dev/install) 120 | - [Python 3.9, 3.10, or 3.11](https://www.python.org/downloads/) 121 | - **Important**: Ensure you can run `python --version` from the console. On Ubuntu, you might need to run `sudo apt install python-is-python3` to link `python` to `python3`. 122 | - [Git](https://git-scm.com/downloads) 123 | - [Install WSL](https://learn.microsoft.com/en-us/windows/wsl/install) - For Windows users only. 124 | - [Powershell 7+ (pwsh)](https://github.com/powershell/powershell) - For Windows users only. 125 | - **Important**: Ensure you can run `pwsh.exe` from a PowerShell terminal. If this fails, you likely need to upgrade PowerShell. 126 | 127 | Then bring down the project code: 128 | 129 | 1. Open a terminal. 130 | 2. Run `azd auth login` and log in using your Azure account credentials. 131 | 3. Run `azd init -t https://github.com/pathway-labs/azure-openai-real-time-data-app`. This command will initialize a git repository and you do not need to clone this repository. 132 | 4. When the project starts, the system prompts you to enter a new environment name: `AZURE_ENV_NAME`. Read more [manage environment variables](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/manage-environment-variables). For example, any name like: *pathway* and outputs for infrastructure provisioning are automatically stored as environment variables in an `.env` file, located under `.azure/pathway/.env` in the project folder. 133 | 5. Then, follow the [deploying from scratch](#deploy-from-scratch) guide. 134 | 135 | ### Run the project locally 136 | 137 | 1. Open the project in [GitHub Codespaces](#open-in-github-codespaces), [VS Code Dev Containers](#open-in-dev-container), or [Local environment](#local-environment). 138 | 2. [Deploy from scratch](#deploy-from-scratch) or [deploy with existing Azure resources](#deploy-with-existing-azure-resources). 139 | 3. Copy `.env` file, located under `.azure//.env` folder to a new `.env` file in the project root folder where `README.md` file is. 140 | 4. Install the required packages: 141 | 142 | ```bash 143 | pip install --upgrade -r requirements_dev.txt 144 | ``` 145 | 5. Navigate to `/app/frontend` folder `cd /app/frontend`. 146 | 6. Run the UI app with the `streamlit run app.py` command. Frontend app uses the backend API deployed in Azure automatically. 147 | 148 | ### Deploy from scratch 149 | 150 | If you don't have any pre-existing Azure services and want to start from a fresh deployment, execute the following commands. 151 | 152 | 1. Open a terminal. 153 | 2. Run `azd up` - This will provision Azure resources and deploy the sample project to those resources. We're using **[Bicep](https://learn.microsoft.com/azure/azure-resource-manager/bicep/overview?tabs=bicep&WT.mc_id=javascript-0000-cxa)**, a language that simplifies the definition of ARM templates and configuring Azure resources. 154 | 3. You keep `EVENT_HUBS_NAMESPACE_CONNECTION_STRING` and `AZURE_OPENAI_API_KEY` empty. We will assign them later after the first successful deployment. 155 | 156 | ![Deployment step 1](/assets/deployment-step-1.png) 157 | 158 | ![Deployment step 2](/assets/deployment-step-2.png) 159 | 160 | After the application has been successfully deployed you will see URLs for both backend and frontend apps printed to the console. 161 | 162 | ![Deployment step 3](/assets/deployment-step-3.png) 163 | 164 | > NOTE: It may take 5-10 minutes for the application to be fully deployed. 165 | 166 | 4. After the first deployment, we set environment variable values for `EVENT_HUBS_NAMESPACE_CONNECTION_STRING` and `AZURE_OPENAI_API_KEY` by running below commands. See how to retrieve [Azure OpenAI API Key](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=programming-language-python#retrieve-key-and-endpoint) and [Event Hubs connection string](https://learn.microsoft.com/en-us/azure/event-hubs/event-hubs-get-connection-string). You can also manually set these values from the Azure portal and skip Step 7. 167 | 168 | ```bash 169 | azd env set AZURE_OPENAI_API_KEY {Azure OpenAI API Key} 170 | 171 | azd env set EVENT_HUBS_NAMESPACE_CONNECTION_STRING {Azure Event Hubs Namespace Connection String} 172 | ``` 173 | 174 | 5. Run `azd deploy` to update these values in the Azure Container App. Pathway LLM App backend uses these environment variables. Other variables will be filled automatically. 175 | 176 | ![Deployment step 4](/assets/deployment-step-4.png) 177 | 178 | 6. Follow the generated link in the terminal for the frontend app in the Azure Container app and start to use the app. App ingests data from Azure event hubs. Learn how to send events using [Azure Event Hubs Data Generator](https://learn.microsoft.com/en-us/azure/event-hubs/send-and-receive-events-using-data-generator). 179 | 180 | ![Customer support and sentiment analysis dashboard](/assets/sentiment-analysis-demo.gif) 181 | 182 | 183 | ### Deploy with existing Azure resources 184 | 185 | If you already have existing Azure resources, you can re-use those by setting `azd` environment values. 186 | 187 | #### Existing Azure resource group 188 | 189 | 1. Run `azd env set AZURE_RESOURCE_GROUP {Name of existing resource group}` 190 | 2. Run `azd env set AZURE_LOCATION {Location of existing resource group}` 191 | 192 | #### Existing Azure OpenAI resource 193 | 194 | 1. Run `azd env set AZURE_OPENAI_SERVICE {Name of existing OpenAI service}` 195 | 2. Run `azd env set AZURE_OPENAI_RESOURCE_GROUP {Name of existing resource group that OpenAI service is provisioned to}` 196 | 3. Run `azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT {Name of existing ChatGPT deployment}`. Only needed if your ChatGPT deployment is not the default 'chat'. 197 | 4. Run `azd env set AZURE_OPENAI_EMB_DEPLOYMENT {Name of existing GPT embedding deployment}`. Only needed if your embedding deployment is not the default 'embedding'. 198 | 199 | When you run `azd up` after and are prompted to select a value for `openAiResourceGroupLocation`, make sure to select the same location as the existing OpenAI resource group. 200 | -------------------------------------------------------------------------------- /app/backend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | COPY . . 4 | 5 | RUN pip install --upgrade -r requirements.txt 6 | 7 | CMD ["sh", "-c", "python app.py --host 0.0.0.0 --port ${PORT:-${WEBSITES_PORT:-8080}}"] -------------------------------------------------------------------------------- /app/backend/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pathway as pw 4 | from pathway.stdlib.ml.index import KNNIndex 5 | 6 | from llm_app.model_wrappers import OpenAIChatGPTModel, OpenAIEmbeddingModel 7 | 8 | from dotenv import load_dotenv 9 | 10 | # Load environment variables 11 | load_dotenv() 12 | 13 | # Set Azure OpenAI configs 14 | service_name = os.environ["AZURE_OPENAI_SERVICE"] 15 | api_base = f"https://{service_name}.openai.azure.com" 16 | api_type = "azure" 17 | api_key = os.environ["AZURE_OPENAI_API_KEY"] 18 | api_version = '2023-05-15' 19 | model_locator = os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"] 20 | embedder_locator = os.environ["AZURE_OPENAI_EMB_DEPLOYMENT"] 21 | embedding_dimension = int(os.environ.get("EMBEDDING_DIMENSION", 1536)) 22 | max_tokens = int(os.environ.get("AZURE_OPENAI_MAX_TOKENS", 500)) 23 | temperature = float(os.environ.get("AZURE_OPENAI_TEMPERATURE", 0.0)) 24 | 25 | 26 | # Set Azure Event Hubs credentials 27 | event_hubs_connection_string = os.environ["EVENT_HUBS_NAMESPACE_CONNECTION_STRING"] 28 | 29 | # Define Kafka cluster settings 30 | rdkafka_settings = { 31 | "bootstrap.servers": "eventhubpathwayns.servicebus.windows.net:9093", 32 | "security.protocol": "SASL_SSL", 33 | "sasl.mechanism": "PLAIN", 34 | "group.id": "$GROUP_NAME", 35 | "session.timeout.ms": "60000", 36 | "sasl.username": "$ConnectionString", 37 | "sasl.password": event_hubs_connection_string, 38 | "enable.ssl.certificate.verification": "false" 39 | } 40 | 41 | 42 | def run( 43 | *, 44 | host: str = "0.0.0.0", 45 | port: int = 8080 46 | ): 47 | # Real-time data coming from the Kafka topic 48 | topic_data = pw.io.kafka.read( 49 | rdkafka_settings, 50 | topic="eventhubpathway", 51 | format="raw", 52 | autocommit_duration_ms=1000, 53 | ) 54 | 55 | # Tranform data to structured document 56 | transformed_topic_data = transform(topic_data) 57 | 58 | # Compute embeddings for each Kafka event using the OpenAI Embeddings API 59 | embedded_topic_data = embeddings(context=transformed_topic_data, 60 | data_to_embed=transformed_topic_data.doc) 61 | 62 | # Construct an index on the generated embeddings in real-time 63 | index = index_embeddings(embedded_topic_data) 64 | 65 | # Given a user question as a query from your API 66 | query, response_writer = pw.io.http.rest_connector( 67 | host=host, 68 | port=port, 69 | schema=QueryInputSchema, 70 | autocommit_duration_ms=50, 71 | ) 72 | 73 | # Generate embeddings for the query from the OpenAI Embeddings API 74 | embedded_query = embeddings(context=query, data_to_embed=pw.this.query) 75 | 76 | # Build prompt using indexed data 77 | responses = prompt(index, embedded_query, pw.this.query) 78 | 79 | # Feed the prompt to ChatGPT and obtain the generated answer. 80 | response_writer(responses) 81 | 82 | pw.run() 83 | 84 | 85 | def concat_with_titles(**kwargs) -> str: 86 | combined = [f"{title}: {value}" for title, value in kwargs.items()] 87 | return ', '.join(combined) 88 | 89 | 90 | def transform(data): 91 | return data.select( 92 | doc=pw.apply(concat_with_titles, **data), 93 | ) 94 | 95 | 96 | def embeddings(context, data_to_embed): 97 | return context + context.select(vector=openai_embedder(data_to_embed)) 98 | 99 | 100 | def index_embeddings(embedded_data): 101 | return KNNIndex(embedded_data.vector, embedded_data, n_dimensions=embedding_dimension) 102 | 103 | 104 | def prompt(index, embedded_query, user_query): 105 | 106 | @pw.udf 107 | def build_prompt(local_indexed_data, query): 108 | docs_str = "\n".join(local_indexed_data) 109 | prompt = f"Given the following data: \n {docs_str} \nanswer this query: {query}" 110 | return prompt 111 | 112 | query_context = embedded_query + index.get_nearest_items( 113 | embedded_query.vector, k=3, collapse_rows=True 114 | ).select(local_indexed_data_list=pw.this.doc).promise_universe_is_equal_to(embedded_query) 115 | 116 | prompt = query_context.select( 117 | prompt=build_prompt(pw.this.local_indexed_data_list, user_query) 118 | ) 119 | 120 | return prompt.select( 121 | query_id=pw.this.id, 122 | result=openai_chat_completion(pw.this.prompt), 123 | ) 124 | 125 | 126 | def openai_embedder(data): 127 | embedder = OpenAIEmbeddingModel(api_key=api_key, 128 | api_type=api_type, 129 | api_base=api_base, 130 | api_version=api_version) 131 | 132 | return embedder.apply(text=data, locator=embedder_locator) 133 | 134 | 135 | def openai_chat_completion(prompt): 136 | model = OpenAIChatGPTModel(api_key=api_key, 137 | api_type=api_type, 138 | api_base=api_base, 139 | api_version=api_version) 140 | 141 | return model.apply( 142 | prompt, 143 | locator=model_locator, 144 | temperature=temperature, 145 | max_tokens=max_tokens, 146 | ) 147 | 148 | 149 | # User API queyr schema 150 | class QueryInputSchema(pw.Schema): 151 | query: str 152 | user: str 153 | 154 | 155 | if __name__ == "__main__": 156 | run() 157 | -------------------------------------------------------------------------------- /app/backend/requirements.txt: -------------------------------------------------------------------------------- 1 | pathway 2 | llm_app 3 | requests 4 | datetime 5 | python-dotenv 6 | kafka -------------------------------------------------------------------------------- /app/frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | COPY . /app 4 | 5 | WORKDIR /app 6 | 7 | RUN apt-get update && apt-get install -y \ 8 | build-essential \ 9 | curl \ 10 | software-properties-common \ 11 | git \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | RUN pip install -r requirements.txt 15 | 16 | EXPOSE 8501 17 | 18 | HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health 19 | 20 | ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] -------------------------------------------------------------------------------- /app/frontend/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import streamlit as st 4 | from dotenv import load_dotenv 5 | import pandas as pd 6 | import json 7 | import time 8 | import plotly.express as px 9 | 10 | load_dotenv() 11 | api_host = os.environ.get("BACKEND_API_URI", "http://127.0.0.1") 12 | 13 | # Streamlit UI elements 14 | st.title("Customer support and sentiment analysis dashboard") 15 | 16 | st.subheader("Example prompt") 17 | 18 | default_prompt = "Provide overall sentiment trends, and common topics and rating over time and sources with counts based on last feedback events and respond only in json without explanation and new line. Follow this json structure and replace values accordingly: {\"sentiment_trends\": {\"positive\": 3, \"negative\": 1, \"neutral\": 1}, \"common_topics\": [{\"topic\": \"customer service\", \"count\": 10}, {\"topic\": \"product quality\", \"count\": 7}], \"rating_over_time\": [{\"date\": \"2021-01-01\", \"rating\": 4.5}, {\"date\": \"2021-01-02\", \"rating\": 3.8}], \"common_sources\": [{\"source\": \"Online Survey\", \"count\": 20}, {\"source\": \"Customer Feedback Form\", \"count\": 15}]}" 19 | 20 | st.text("Provide overall sentiment trends, and common topics and rating over time\n and sources with counts based on last feedback events") 21 | 22 | url = f"{api_host}" 23 | data = {"query": default_prompt, "user": "user"} 24 | 25 | response = requests.post(url, json=data) 26 | 27 | if response.status_code == 200: 28 | data_response = response.json() 29 | json_data = json.loads(data_response) 30 | 31 | # Sentiment Trends 32 | sentiment_df = pd.DataFrame(list(json_data["sentiment_trends"].items()), columns=['Sentiment', 'Count']) 33 | color_map = {"positive": "green", "negative": "red", "neutral": "blue"} 34 | fig_sentiment = px.bar(sentiment_df, x='Sentiment', y='Count', title="Sentiment Trends", color='Sentiment', color_discrete_map=color_map) 35 | 36 | # Rating Over Time 37 | # rating_data = [] 38 | rating_data = json_data["rating_over_time"] 39 | rating_df = pd.DataFrame(rating_data) 40 | rating_df['Date'] = pd.to_datetime(rating_df['date']) 41 | fig_rating = px.line(rating_df, x='Date', y='rating', title="Average Rating Over Time", markers=True) 42 | 43 | # Streamlit layout 44 | st.plotly_chart(fig_sentiment, use_container_width=True) 45 | 46 | st.plotly_chart(fig_rating, use_container_width=True) 47 | 48 | # Convert the source counts to a DataFrame for visualization 49 | sources_df = pd.DataFrame(json_data["common_topics"], columns=['topic', 'count']) 50 | fig_sources = px.bar(sources_df, x='topic', y='count', title="Common Topics") 51 | st.plotly_chart(fig_sources, use_container_width=True) 52 | 53 | sources_df = pd.DataFrame(json_data["common_sources"], columns=['source', 'count']) 54 | fig_sources = px.bar(sources_df, x='source', y='count', title="Common Sources") 55 | st.plotly_chart(fig_sources, use_container_width=True) 56 | 57 | else: 58 | st.error( 59 | f"Failed to send data to API. Status code: {response.status_code}" 60 | ) 61 | 62 | # Uncomment this to make real-time 63 | 64 | # placeholder = st.empty() 65 | 66 | 67 | # for seconds in range(200): 68 | 69 | # url = f"{api_host}" 70 | # data = {"query": default_prompt, "user": "user"} 71 | 72 | # response = requests.post(url, json=data) 73 | 74 | # if response.status_code == 200: 75 | # data_response = response.json() 76 | # json_data = json.loads(data_response) 77 | 78 | # with placeholder.container(): 79 | # # Sentiment Trends 80 | # sentiment_df = pd.DataFrame(list(json_data["sentiment_trends"].items()), columns=['Sentiment', 'Count']) 81 | # color_map = {"positive": "green", "negative": "red", "neutral": "blue"} 82 | # fig_sentiment = px.bar(sentiment_df, x='Sentiment', y='Count', title="Sentiment Trends", color='Sentiment', color_discrete_map=color_map) 83 | 84 | # # Rating Over Time 85 | # rating_data = json_data["rating_over_time"] 86 | # rating_df = pd.DataFrame(rating_data) 87 | # rating_df['Date'] = pd.to_datetime(rating_df['date']) 88 | # fig_rating = px.line(rating_df, x='Date', y='rating', title="Average Rating Over Time", markers=True) 89 | 90 | # # Streamlit layout 91 | # st.plotly_chart(fig_sentiment, use_container_width=True) 92 | 93 | # st.plotly_chart(fig_rating, use_container_width=True) 94 | 95 | # # Convert the source counts to a DataFrame for visualization 96 | # sources_df = pd.DataFrame(json_data["common_topics"], columns=['topic', 'count']) 97 | # fig_sources = px.bar(sources_df, x='topic', y='count', title="Common Topics") 98 | # st.plotly_chart(fig_sources, use_container_width=True) 99 | 100 | # sources_df = pd.DataFrame(json_data["common_sources"], columns=['source', 'count']) 101 | # fig_sources = px.bar(sources_df, x='source', y='count', title="Common Sources") 102 | # st.plotly_chart(fig_sources, use_container_width=True) 103 | 104 | # time.sleep(2) 105 | 106 | # else: 107 | # st.error( 108 | # f"Failed to send data to API. Status code: {response.status_code}" 109 | # ) 110 | -------------------------------------------------------------------------------- /app/frontend/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | requests 3 | python-dotenv 4 | pandas 5 | plotly -------------------------------------------------------------------------------- /assets/azure-infra-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathway-labs/azure-openai-real-time-data-app/2be272d39b4f4a56106c71814d5ec7625da3e1da/assets/azure-infra-architecture.png -------------------------------------------------------------------------------- /assets/deployment-step-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathway-labs/azure-openai-real-time-data-app/2be272d39b4f4a56106c71814d5ec7625da3e1da/assets/deployment-step-1.png -------------------------------------------------------------------------------- /assets/deployment-step-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathway-labs/azure-openai-real-time-data-app/2be272d39b4f4a56106c71814d5ec7625da3e1da/assets/deployment-step-2.png -------------------------------------------------------------------------------- /assets/deployment-step-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathway-labs/azure-openai-real-time-data-app/2be272d39b4f4a56106c71814d5ec7625da3e1da/assets/deployment-step-3.png -------------------------------------------------------------------------------- /assets/deployment-step-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathway-labs/azure-openai-real-time-data-app/2be272d39b4f4a56106c71814d5ec7625da3e1da/assets/deployment-step-4.png -------------------------------------------------------------------------------- /assets/sentiment-analysis-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathway-labs/azure-openai-real-time-data-app/2be272d39b4f4a56106c71814d5ec7625da3e1da/assets/sentiment-analysis-demo.gif -------------------------------------------------------------------------------- /azure.yaml: -------------------------------------------------------------------------------- 1 | name: pathway-azure-openai-confluent-demo 2 | metadata: 3 | template: pathway-azure-openai-confluent-demo@0.0.2-beta 4 | services: 5 | backend: 6 | project: ./app/backend 7 | host: containerapp 8 | language: python 9 | frontend: 10 | project: ./app/frontend 11 | host: containerapp 12 | language: python -------------------------------------------------------------------------------- /data/feedback_events.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "feedback_id": "12345", 4 | "source": "email", 5 | "timestamp": "2023-11-27T10:00:00Z", 6 | "rating": 3, 7 | "text": "I found the product useful, but the setup process was complicated and frustrating." 8 | }, 9 | { 10 | "feedback_id": "12346", 11 | "source": "website", 12 | "timestamp": "2023-11-26T15:30:00Z", 13 | "rating": 5, 14 | "text": "Absolutely love this product! It has exceeded my expectations in every way." 15 | }, 16 | { 17 | "feedback_id": "12347", 18 | "source": "email", 19 | "timestamp": "2023-11-25T09:20:00Z", 20 | "rating": 2, 21 | "text": "The product does not match the description on the website. Quite disappointed." 22 | }, 23 | { 24 | "feedback_id": "12348", 25 | "source": "social_media", 26 | "timestamp": "2023-11-24T12:45:00Z", 27 | "rating": 4, 28 | "text": "Good quality and works well, but took longer than expected to arrive." 29 | }, 30 | { 31 | "feedback_id": "12349", 32 | "source": "email", 33 | "timestamp": "2023-11-23T17:00:00Z", 34 | "rating": 1, 35 | "text": "Had a terrible experience with customer service. Not happy with the product either." 36 | }, 37 | { 38 | "feedback_id": "12350", 39 | "source": "website", 40 | "timestamp": "2023-11-22T20:30:00Z", 41 | "rating": 4, 42 | "text": "Really good product, but I wish there were more color options available." 43 | }, 44 | { 45 | "feedback_id": "12351", 46 | "source": "phone", 47 | "timestamp": "2023-11-21T11:15:00Z", 48 | "rating": 3, 49 | "text": "The product is okay, but I expected better performance for the price." 50 | }, 51 | { 52 | "feedback_id": "12352", 53 | "source": "social_media", 54 | "timestamp": "2023-11-20T14:05:00Z", 55 | "rating": 5, 56 | "text": "Fantastic! This product has made my life so much easier. Highly recommend." 57 | }, 58 | { 59 | "feedback_id": "12353", 60 | "source": "email", 61 | "timestamp": "2023-11-19T18:40:00Z", 62 | "rating": 2, 63 | "text": "The features are decent, but it's not as durable as I expected." 64 | }, 65 | { 66 | "feedback_id": "12354", 67 | "source": "website", 68 | "timestamp": "2023-11-18T16:50:00Z", 69 | "rating": 4, 70 | "text": "Great value for the price. Works well, but the user manual could be more detailed." 71 | } 72 | ] -------------------------------------------------------------------------------- /infra/abbreviations.json: -------------------------------------------------------------------------------- 1 | { 2 | "analysisServicesServers": "as", 3 | "apiManagementService": "apim-", 4 | "appConfigurationConfigurationStores": "appcs-", 5 | "appManagedEnvironments": "cae-", 6 | "appContainerApps": "ca-", 7 | "authorizationPolicyDefinitions": "policy-", 8 | "automationAutomationAccounts": "aa-", 9 | "blueprintBlueprints": "bp-", 10 | "blueprintBlueprintsArtifacts": "bpa-", 11 | "cacheRedis": "redis-", 12 | "cdnProfiles": "cdnp-", 13 | "cdnProfilesEndpoints": "cdne-", 14 | "cognitiveServicesAccounts": "cog-", 15 | "cognitiveServicesFormRecognizer": "cog-fr-", 16 | "cognitiveServicesTextAnalytics": "cog-ta-", 17 | "computeAvailabilitySets": "avail-", 18 | "computeCloudServices": "cld-", 19 | "computeDiskEncryptionSets": "des", 20 | "computeDisks": "disk", 21 | "computeDisksOs": "osdisk", 22 | "computeGalleries": "gal", 23 | "computeSnapshots": "snap-", 24 | "computeVirtualMachines": "vm", 25 | "computeVirtualMachineScaleSets": "vmss-", 26 | "containerInstanceContainerGroups": "ci", 27 | "containerRegistryRegistries": "cr", 28 | "containerServiceManagedClusters": "aks-", 29 | "databricksWorkspaces": "dbw-", 30 | "dataFactoryFactories": "adf-", 31 | "dataLakeAnalyticsAccounts": "dla", 32 | "dataLakeStoreAccounts": "dls", 33 | "dataMigrationServices": "dms-", 34 | "dBforMySQLServers": "mysql-", 35 | "dBforPostgreSQLServers": "psql-", 36 | "devicesIotHubs": "iot-", 37 | "devicesProvisioningServices": "provs-", 38 | "devicesProvisioningServicesCertificates": "pcert-", 39 | "documentDBDatabaseAccounts": "cosmos-", 40 | "eventGridDomains": "evgd-", 41 | "eventGridDomainsTopics": "evgt-", 42 | "eventGridEventSubscriptions": "evgs-", 43 | "eventHubNamespaces": "evhns-", 44 | "eventHubNamespacesEventHubs": "evh-", 45 | "hdInsightClustersHadoop": "hadoop-", 46 | "hdInsightClustersHbase": "hbase-", 47 | "hdInsightClustersKafka": "kafka-", 48 | "hdInsightClustersMl": "mls-", 49 | "hdInsightClustersSpark": "spark-", 50 | "hdInsightClustersStorm": "storm-", 51 | "hybridComputeMachines": "arcs-", 52 | "insightsActionGroups": "ag-", 53 | "insightsComponents": "appi-", 54 | "keyVaultVaults": "kv-", 55 | "kubernetesConnectedClusters": "arck", 56 | "kustoClusters": "dec", 57 | "kustoClustersDatabases": "dedb", 58 | "logicIntegrationAccounts": "ia-", 59 | "logicWorkflows": "logic-", 60 | "machineLearningServicesWorkspaces": "mlw-", 61 | "managedIdentityUserAssignedIdentities": "id-", 62 | "managementManagementGroups": "mg-", 63 | "migrateAssessmentProjects": "migr-", 64 | "networkApplicationGateways": "agw-", 65 | "networkApplicationSecurityGroups": "asg-", 66 | "networkAzureFirewalls": "afw-", 67 | "networkBastionHosts": "bas-", 68 | "networkConnections": "con-", 69 | "networkDnsZones": "dnsz-", 70 | "networkExpressRouteCircuits": "erc-", 71 | "networkFirewallPolicies": "afwp-", 72 | "networkFirewallPoliciesWebApplication": "waf", 73 | "networkFirewallPoliciesRuleGroups": "wafrg", 74 | "networkFrontDoors": "fd-", 75 | "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-", 76 | "networkLoadBalancersExternal": "lbe-", 77 | "networkLoadBalancersInternal": "lbi-", 78 | "networkLoadBalancersInboundNatRules": "rule-", 79 | "networkLocalNetworkGateways": "lgw-", 80 | "networkNatGateways": "ng-", 81 | "networkNetworkInterfaces": "nic-", 82 | "networkNetworkSecurityGroups": "nsg-", 83 | "networkNetworkSecurityGroupsSecurityRules": "nsgsr-", 84 | "networkNetworkWatchers": "nw-", 85 | "networkPrivateDnsZones": "pdnsz-", 86 | "networkPrivateLinkServices": "pl-", 87 | "networkPublicIPAddresses": "pip-", 88 | "networkPublicIPPrefixes": "ippre-", 89 | "networkRouteFilters": "rf-", 90 | "networkRouteTables": "rt-", 91 | "networkRouteTablesRoutes": "udr-", 92 | "networkTrafficManagerProfiles": "traf-", 93 | "networkVirtualNetworkGateways": "vgw-", 94 | "networkVirtualNetworks": "vnet-", 95 | "networkVirtualNetworksSubnets": "snet-", 96 | "networkVirtualNetworksVirtualNetworkPeerings": "peer-", 97 | "networkVirtualWans": "vwan-", 98 | "networkVpnGateways": "vpng-", 99 | "networkVpnGatewaysVpnConnections": "vcn-", 100 | "networkVpnGatewaysVpnSites": "vst-", 101 | "notificationHubsNamespaces": "ntfns-", 102 | "notificationHubsNamespacesNotificationHubs": "ntf-", 103 | "operationalInsightsWorkspaces": "log-", 104 | "portalDashboards": "dash-", 105 | "powerBIDedicatedCapacities": "pbi-", 106 | "purviewAccounts": "pview-", 107 | "recoveryServicesVaults": "rsv-", 108 | "resourcesResourceGroups": "rg-", 109 | "searchSearchServices": "srch-", 110 | "serviceBusNamespaces": "sb-", 111 | "serviceBusNamespacesQueues": "sbq-", 112 | "serviceBusNamespacesTopics": "sbt-", 113 | "serviceEndPointPolicies": "se-", 114 | "serviceFabricClusters": "sf-", 115 | "signalRServiceSignalR": "sigr", 116 | "sqlManagedInstances": "sqlmi-", 117 | "sqlServers": "sql-", 118 | "sqlServersDataWarehouse": "sqldw-", 119 | "sqlServersDatabases": "sqldb-", 120 | "sqlServersDatabasesStretch": "sqlstrdb-", 121 | "storageStorageAccounts": "st", 122 | "storageStorageAccountsVm": "stvm", 123 | "storSimpleManagers": "ssimp", 124 | "streamAnalyticsCluster": "asa-", 125 | "synapseWorkspaces": "syn", 126 | "synapseWorkspacesAnalyticsWorkspaces": "synw", 127 | "synapseWorkspacesSqlPoolsDedicated": "syndp", 128 | "synapseWorkspacesSqlPoolsSpark": "synsp", 129 | "timeSeriesInsightsEnvironments": "tsi-", 130 | "webServerFarms": "plan-", 131 | "webSitesAppService": "app-", 132 | "webSitesAppServiceEnvironment": "ase-", 133 | "webSitesFunctions": "func-", 134 | "webStaticSites": "stapp-" 135 | } 136 | -------------------------------------------------------------------------------- /infra/core/ai/cognitiveservices.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure Cognitive Services instance.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | @description('The custom subdomain name used to access the API. Defaults to the value of the name parameter.') 6 | param customSubDomainName string = name 7 | param deployments array = [] 8 | param kind string = 'OpenAI' 9 | param publicNetworkAccess string = 'Enabled' 10 | param sku object = { 11 | name: 'S0' 12 | } 13 | 14 | resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = { 15 | name: name 16 | location: location 17 | tags: tags 18 | kind: kind 19 | properties: { 20 | customSubDomainName: customSubDomainName 21 | publicNetworkAccess: publicNetworkAccess 22 | } 23 | sku: sku 24 | } 25 | 26 | @batchSize(1) 27 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: { 28 | parent: account 29 | name: deployment.name 30 | properties: { 31 | model: deployment.model 32 | raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null 33 | } 34 | sku: contains(deployment, 'sku') ? deployment.sku : { 35 | name: 'Standard' 36 | capacity: 20 37 | } 38 | }] 39 | 40 | output endpoint string = account.properties.endpoint 41 | output id string = account.id 42 | output name string = account.name 43 | -------------------------------------------------------------------------------- /infra/core/data/event-hubs.bicep: -------------------------------------------------------------------------------- 1 | @description('Specifies a project name that is used to generate the Event Hub name and the Namespace name.') 2 | param eventHubProjectName string = 'eventhubpathway' 3 | 4 | @description('Specifies the Azure location for all resources.') 5 | param location string = resourceGroup().location 6 | 7 | @description('Specifies the messaging tier for Event Hub Namespace.') 8 | @allowed([ 9 | 'Basic' 10 | 'Standard' 11 | ]) 12 | param eventHubSku string = 'Standard' 13 | 14 | var eventHubNamespaceName = '${eventHubProjectName}ns' 15 | var eventHubName = eventHubProjectName 16 | 17 | resource eventHubNamespace 'Microsoft.EventHub/namespaces@2021-11-01' = { 18 | name: eventHubNamespaceName 19 | location: location 20 | sku: { 21 | name: eventHubSku 22 | tier: eventHubSku 23 | capacity: 1 24 | } 25 | properties: { 26 | isAutoInflateEnabled: false 27 | maximumThroughputUnits: 0 28 | } 29 | } 30 | 31 | resource eventHub 'Microsoft.EventHub/namespaces/eventhubs@2021-11-01' = { 32 | parent: eventHubNamespace 33 | name: eventHubName 34 | properties: { 35 | messageRetentionInDays: 7 36 | partitionCount: 1 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /infra/core/host/container-app.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param containerAppsEnvironmentName string = '' 6 | param containerName string = 'main' 7 | param containerRegistryName string = '' 8 | param env array = [] 9 | param secrets array = [] 10 | param external bool = true 11 | param imageName string 12 | param keyVaultName string = '' 13 | param managedIdentity bool = !empty(keyVaultName) 14 | param targetPort int = 80 15 | param allowedOrigins array = [] 16 | 17 | @description('CPU cores allocated to a single container instance, e.g. 0.5') 18 | param containerCpuCoreCount string = '0.5' 19 | 20 | @description('Memory allocated to a single container instance, e.g. 1Gi') 21 | param containerMemory string = '1.0Gi' 22 | 23 | resource app 'Microsoft.App/containerApps@2023-05-01' = { 24 | name: name 25 | location: location 26 | tags: tags 27 | identity: { type: managedIdentity ? 'SystemAssigned' : 'None' } 28 | properties: { 29 | managedEnvironmentId: containerAppsEnvironment.id 30 | configuration: { 31 | activeRevisionsMode: 'single' 32 | ingress: { 33 | external: external 34 | targetPort: targetPort 35 | transport: 'auto' 36 | corsPolicy: { 37 | allowedOrigins: empty(allowedOrigins) ? ['*'] : allowedOrigins 38 | } 39 | } 40 | secrets: concat(secrets, [ 41 | { 42 | name: 'registry-password' 43 | value: containerRegistry.listCredentials().passwords[0].value 44 | } 45 | ]) 46 | registries: [ 47 | { 48 | server: '${containerRegistry.name}.azurecr.io' 49 | username: containerRegistry.name 50 | passwordSecretRef: 'registry-password' 51 | } 52 | ] 53 | } 54 | template: { 55 | containers: [ 56 | { 57 | image: imageName 58 | name: containerName 59 | env: env 60 | resources: { 61 | cpu: json(containerCpuCoreCount) 62 | memory: containerMemory 63 | } 64 | } 65 | ] 66 | scale: { 67 | minReplicas: 1 68 | maxReplicas: 10 69 | } 70 | } 71 | } 72 | dependsOn: [ 73 | containerRegistry 74 | ] 75 | } 76 | 77 | resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2022-03-01' existing = { 78 | name: containerAppsEnvironmentName 79 | } 80 | 81 | // 2022-02-01-preview needed for anonymousPullEnabled 82 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' existing = { 83 | name: containerRegistryName 84 | } 85 | 86 | output identityPrincipalId string = managedIdentity ? app.identity.principalId : '' 87 | output imageName string = imageName 88 | output name string = app.name 89 | output uri string = 'https://${app.properties.configuration.ingress.fqdn}' 90 | -------------------------------------------------------------------------------- /infra/core/host/container-apps-environment.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param logAnalyticsWorkspaceName string 6 | 7 | resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2022-03-01' = { 8 | name: name 9 | location: location 10 | tags: tags 11 | properties: { 12 | appLogsConfiguration: { 13 | destination: 'log-analytics' 14 | logAnalyticsConfiguration: { 15 | customerId: logAnalyticsWorkspace.properties.customerId 16 | sharedKey: logAnalyticsWorkspace.listKeys().primarySharedKey 17 | } 18 | } 19 | } 20 | } 21 | 22 | resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = { 23 | name: logAnalyticsWorkspaceName 24 | } 25 | 26 | output name string = containerAppsEnvironment.name 27 | -------------------------------------------------------------------------------- /infra/core/host/container-apps.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param containerAppsEnvironmentName string = '' 6 | param containerRegistryName string = '' 7 | param logAnalyticsWorkspaceName string = '' 8 | 9 | module containerAppsEnvironment 'container-apps-environment.bicep' = { 10 | name: '${name}-container-apps-environment' 11 | params: { 12 | name: containerAppsEnvironmentName 13 | location: location 14 | tags: tags 15 | logAnalyticsWorkspaceName: logAnalyticsWorkspaceName 16 | } 17 | } 18 | 19 | module containerRegistry 'container-registry.bicep' = { 20 | name: '${name}-container-registry' 21 | params: { 22 | name: containerRegistryName 23 | location: location 24 | tags: tags 25 | } 26 | } 27 | 28 | output environmentName string = containerAppsEnvironment.outputs.name 29 | output registryLoginServer string = containerRegistry.outputs.loginServer 30 | output registryName string = containerRegistry.outputs.name 31 | -------------------------------------------------------------------------------- /infra/core/host/container-registry.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param adminUserEnabled bool = true 6 | param anonymousPullEnabled bool = false 7 | param dataEndpointEnabled bool = false 8 | param encryption object = { 9 | status: 'disabled' 10 | } 11 | param networkRuleBypassOptions string = 'AzureServices' 12 | param publicNetworkAccess string = 'Enabled' 13 | param sku object = { 14 | name: 'Basic' 15 | } 16 | param zoneRedundancy string = 'Disabled' 17 | 18 | @description('The log analytics workspace id used for logging & monitoring') 19 | param workspaceId string = '' 20 | 21 | // 2022-02-01-preview needed for anonymousPullEnabled 22 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' = { 23 | name: name 24 | location: location 25 | tags: tags 26 | sku: sku 27 | properties: { 28 | adminUserEnabled: adminUserEnabled 29 | anonymousPullEnabled: anonymousPullEnabled 30 | dataEndpointEnabled: dataEndpointEnabled 31 | encryption: encryption 32 | networkRuleBypassOptions: networkRuleBypassOptions 33 | publicNetworkAccess: publicNetworkAccess 34 | zoneRedundancy: zoneRedundancy 35 | } 36 | } 37 | 38 | // TODO: Update diagnostics to be its own module 39 | // Blocking issue: https://github.com/Azure/bicep/issues/622 40 | // Unable to pass in a `resource` scope or unable to use string interpolation in resource types 41 | resource diagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = if (!empty(workspaceId)) { 42 | name: 'registry-diagnostics' 43 | scope: containerRegistry 44 | properties: { 45 | workspaceId: workspaceId 46 | logs: [ 47 | { 48 | category: 'ContainerRegistryRepositoryEvents' 49 | enabled: true 50 | } 51 | { 52 | category: 'ContainerRegistryLoginEvents' 53 | enabled: true 54 | } 55 | ] 56 | metrics: [ 57 | { 58 | category: 'AllMetrics' 59 | enabled: true 60 | timeGrain: 'PT1M' 61 | } 62 | ] 63 | } 64 | } 65 | 66 | output loginServer string = containerRegistry.properties.loginServer 67 | output name string = containerRegistry.name 68 | -------------------------------------------------------------------------------- /infra/core/monitor/applicationinsights.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { 6 | name: name 7 | location: location 8 | tags: tags 9 | kind: 'web' 10 | properties: { 11 | Application_Type: 'web' 12 | } 13 | } 14 | 15 | output connectionString string = applicationInsights.properties.ConnectionString 16 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey 17 | output name string = applicationInsights.name 18 | -------------------------------------------------------------------------------- /infra/core/monitor/loganalytics.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = { 6 | name: name 7 | location: location 8 | tags: tags 9 | properties: any({ 10 | retentionInDays: 30 11 | features: { 12 | searchVersion: 1 13 | } 14 | sku: { 15 | name: 'PerGB2018' 16 | } 17 | }) 18 | } 19 | 20 | output id string = logAnalytics.id 21 | output name string = logAnalytics.name 22 | -------------------------------------------------------------------------------- /infra/core/monitor/monitoring.bicep: -------------------------------------------------------------------------------- 1 | param logAnalyticsName string 2 | param applicationInsightsName string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | var useApplicationInsights = !empty(applicationInsightsName) 7 | 8 | module logAnalytics 'loganalytics.bicep' = { 9 | name: 'loganalytics' 10 | params: { 11 | name: logAnalyticsName 12 | location: location 13 | tags: tags 14 | } 15 | } 16 | 17 | module applicationInsights 'applicationinsights.bicep' = if (useApplicationInsights) { 18 | name: 'applicationinsights' 19 | params: { 20 | name: applicationInsightsName 21 | location: location 22 | tags: tags 23 | } 24 | } 25 | 26 | output applicationInsightsConnectionString string = useApplicationInsights ? applicationInsights.outputs.connectionString : '' 27 | output applicationInsightsInstrumentationKey string = useApplicationInsights ? applicationInsights.outputs.instrumentationKey : '' 28 | output applicationInsightsName string = useApplicationInsights ? applicationInsights.outputs.name : '' 29 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name 30 | -------------------------------------------------------------------------------- /infra/core/security/role.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a role assignment for a service principal.' 2 | param principalId string 3 | 4 | @allowed([ 5 | 'Device' 6 | 'ForeignGroup' 7 | 'Group' 8 | 'ServicePrincipal' 9 | 'User' 10 | ]) 11 | param principalType string = 'ServicePrincipal' 12 | param roleDefinitionId string 13 | 14 | resource role 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 15 | name: guid(subscription().id, resourceGroup().id, principalId, roleDefinitionId) 16 | properties: { 17 | principalId: principalId 18 | principalType: principalType 19 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionId) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /infra/main.bicep: -------------------------------------------------------------------------------- 1 | targetScope = 'subscription' 2 | 3 | @minLength(1) 4 | @maxLength(64) 5 | @description('Name of the the environment which is used to generate a short unique hash used in all resources.') 6 | param environmentName string 7 | 8 | @minLength(1) 9 | @description('Primary location for all resources') 10 | param location string 11 | 12 | param appServicePlanName string = '' 13 | param backendServiceName string = '' 14 | param resourceGroupName string = '' 15 | 16 | param applicationInsightsDashboardName string = '' 17 | param applicationInsightsName string = '' 18 | param logAnalyticsName string = '' 19 | 20 | @allowed(['azure', 'openai']) 21 | param openAiHost string // Set in main.parameters.json 22 | 23 | param openAiServiceName string = '' 24 | param openAiResourceGroupName string = '' 25 | @description('Location for the OpenAI resource group') 26 | @allowed(['canadaeast', 'eastus', 'eastus2', 'francecentral', 'switzerlandnorth', 'uksouth', 'japaneast', 'northcentralus', 'australiaeast', 'swedencentral']) 27 | @metadata({ 28 | azd: { 29 | type: 'location' 30 | } 31 | }) 32 | param openAiResourceGroupLocation string 33 | 34 | param openAiSkuName string = 'S0' 35 | 36 | param chatGptDeploymentName string // Set in main.parameters.json 37 | param chatGptDeploymentCapacity int = 30 38 | param chatGptModelName string = (openAiHost == 'azure') ? 'gpt-35-turbo' : 'gpt-3.5-turbo' 39 | param chatGptModelVersion string = '0613' 40 | param embeddingDeploymentName string // Set in main.parameters.json 41 | param embeddingDeploymentCapacity int = 30 42 | param embeddingModelName string = 'text-embedding-ada-002' 43 | param openAiApiKey string // Set in main.parameters.json 44 | 45 | param frontendStreamlitName string = 'frontend' 46 | param frontendStreamlitImageName string = '' 47 | 48 | param backendApiName string = 'backend' 49 | param backendApiImageName string = '' 50 | 51 | param eventHubsNamespaceConnectionString string // Set in main.parameters.json 52 | 53 | @description('Use Application Insights for monitoring and performance tracing') 54 | param useApplicationInsights bool = false 55 | 56 | var abbrs = loadJsonContent('abbreviations.json') 57 | var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) 58 | var tags = { 'azd-env-name': environmentName } 59 | 60 | // Organize resources in a resource group 61 | resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { 62 | name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' 63 | location: location 64 | tags: tags 65 | } 66 | 67 | resource openAiResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(openAiResourceGroupName)) { 68 | name: !empty(openAiResourceGroupName) ? openAiResourceGroupName : resourceGroup.name 69 | } 70 | 71 | // Monitor application with Azure Monitor 72 | module monitoring './core/monitor/monitoring.bicep' = { 73 | name: 'monitoring' 74 | scope: resourceGroup 75 | params: { 76 | location: location 77 | tags: tags 78 | logAnalyticsName: '${abbrs.operationalInsightsWorkspaces}${resourceToken}' 79 | applicationInsightsName: useApplicationInsights ? '${abbrs.insightsComponents}${resourceToken}' : '' 80 | } 81 | } 82 | 83 | // Event Hubs 84 | module eventHubs './core/data/event-hubs.bicep' = { 85 | name: 'event-hubs' 86 | scope: resourceGroup 87 | params: { 88 | location: location 89 | } 90 | } 91 | 92 | // Container apps host (including container registry) 93 | module containerApps './core/host/container-apps.bicep' = { 94 | name: 'container-apps' 95 | scope: resourceGroup 96 | params: { 97 | name: 'containerapps' 98 | containerAppsEnvironmentName: '${abbrs.appManagedEnvironments}${resourceToken}' 99 | containerRegistryName: '${abbrs.containerRegistryRegistries}${resourceToken}' 100 | location: location 101 | tags: tags 102 | logAnalyticsWorkspaceName: monitoring.outputs.logAnalyticsWorkspaceName 103 | } 104 | } 105 | 106 | // The backend API 107 | module backendApi './core/host/container-app.bicep' = { 108 | name: 'backend-api' 109 | scope: resourceGroup 110 | params: { 111 | name: !empty(backendApiName) ? backendApiName : '${abbrs.appContainerApps}api-${resourceToken}' 112 | location: location 113 | tags: union(tags, { 'azd-service-name': 'backend' }) 114 | containerAppsEnvironmentName: containerApps.outputs.environmentName 115 | containerRegistryName: containerApps.outputs.registryName 116 | managedIdentity: true 117 | containerCpuCoreCount: '1.0' 118 | containerMemory: '2.0Gi' 119 | secrets: useApplicationInsights ? [ 120 | { 121 | name: 'appinsights-cs' 122 | value: monitoring.outputs.applicationInsightsConnectionString 123 | } 124 | ] : [] 125 | env: concat([ 126 | { 127 | name: 'AZURE_OPENAI_CHATGPT_DEPLOYMENT' 128 | value: chatGptDeploymentName 129 | } 130 | { 131 | name: 'AZURE_OPENAI_CHATGPT_MODEL' 132 | value: chatGptModelName 133 | } 134 | { 135 | name: 'AZURE_OPENAI_EMB_DEPLOYMENT' 136 | value: embeddingDeploymentName 137 | } 138 | { 139 | name: 'AZURE_OPENAI_EMBEDDING_MODEL' 140 | value: embeddingModelName 141 | } 142 | { 143 | name: 'AZURE_OPENAI_API_KEY' 144 | value: openAiApiKey 145 | } 146 | { 147 | name: 'AZURE_OPENAI_SERVICE' 148 | value: openAi.outputs.name 149 | } 150 | { 151 | name: 'EVENT_HUBS_NAMESPACE_CONNECTION_STRING' 152 | value: eventHubsNamespaceConnectionString 153 | } 154 | ], useApplicationInsights ? [{ 155 | name: 'APPLICATIONINSIGHTS_CONNECTION_STRING' 156 | secretRef: 'appinsights-cs' 157 | }] : []) 158 | imageName: !empty(backendApiImageName) ? backendApiImageName : 'python:3.11' 159 | targetPort: 8080 160 | } 161 | } 162 | 163 | 164 | // The frontend UI 165 | module frontendStreamlit './core/host/container-app.bicep' = { 166 | name: 'frontend-streamlit' 167 | scope: resourceGroup 168 | params: { 169 | name: !empty(frontendStreamlitName) ? frontendStreamlitName : '${abbrs.appContainerApps}ui-${resourceToken}' 170 | location: location 171 | tags: union(tags, { 'azd-service-name': 'frontend' }) 172 | containerAppsEnvironmentName: containerApps.outputs.environmentName 173 | containerRegistryName: containerApps.outputs.registryName 174 | managedIdentity: true 175 | containerCpuCoreCount: '1.0' 176 | containerMemory: '2.0Gi' 177 | env: concat([ 178 | { 179 | name: 'BACKEND_API_URI' 180 | value: backendApi.outputs.uri 181 | } 182 | ]) 183 | imageName: !empty(frontendStreamlitImageName) ? frontendStreamlitImageName : 'python:3.11' 184 | targetPort: 8501 185 | } 186 | } 187 | 188 | 189 | module openAi 'core/ai/cognitiveservices.bicep' = if (openAiHost == 'azure') { 190 | name: 'openai' 191 | scope: openAiResourceGroup 192 | params: { 193 | name: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' 194 | location: openAiResourceGroupLocation 195 | tags: tags 196 | sku: { 197 | name: openAiSkuName 198 | } 199 | deployments: [ 200 | { 201 | name: chatGptDeploymentName 202 | model: { 203 | format: 'OpenAI' 204 | name: chatGptModelName 205 | version: chatGptModelVersion 206 | } 207 | sku: { 208 | name: 'Standard' 209 | capacity: chatGptDeploymentCapacity 210 | } 211 | } 212 | { 213 | name: embeddingDeploymentName 214 | model: { 215 | format: 'OpenAI' 216 | name: embeddingModelName 217 | version: '2' 218 | } 219 | capacity: embeddingDeploymentCapacity 220 | } 221 | ] 222 | } 223 | } 224 | 225 | output AZURE_LOCATION string = location 226 | output AZURE_TENANT_ID string = tenant().tenantId 227 | output AZURE_RESOURCE_GROUP string = resourceGroup.name 228 | 229 | output AZURE_CONTAINER_REGISTRY_ENDPOINT string = containerApps.outputs.registryLoginServer 230 | output AZURE_CONTAINER_REGISTRY_NAME string = containerApps.outputs.registryName 231 | 232 | // Shared by all OpenAI deployments 233 | output OPENAI_HOST string = openAiHost 234 | output AZURE_OPENAI_EMB_MODEL_NAME string = embeddingModelName 235 | output AZURE_OPENAI_CHATGPT_MODEL string = chatGptModelName 236 | // Specific to Azure OpenAI 237 | output AZURE_OPENAI_SERVICE string = openAi.outputs.name 238 | output AZURE_OPENAI_RESOURCE_GROUP string = (openAiHost == 'azure') ? openAiResourceGroup.name : '' 239 | output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = (openAiHost == 'azure') ? chatGptDeploymentName : '' 240 | output AZURE_OPENAI_EMB_DEPLOYMENT string = (openAiHost == 'azure') ? embeddingDeploymentName : '' 241 | 242 | output BACKEND_API_URI string = backendApi.outputs.uri 243 | 244 | output EVENT_HUBS_NAMESPACE_CONNECTION_STRING string = eventHubsNamespaceConnectionString 245 | 246 | -------------------------------------------------------------------------------- /infra/main.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environmentName": { 6 | "value": "${AZURE_ENV_NAME}" 7 | }, 8 | "resourceGroupName": { 9 | "value": "${AZURE_RESOURCE_GROUP}" 10 | }, 11 | "location": { 12 | "value": "${AZURE_LOCATION}" 13 | }, 14 | "principalId": { 15 | "value": "${AZURE_PRINCIPAL_ID}" 16 | }, 17 | "openAiServiceName": { 18 | "value": "${AZURE_OPENAI_SERVICE}" 19 | }, 20 | "openAiResourceGroupName": { 21 | "value": "${AZURE_OPENAI_RESOURCE_GROUP}" 22 | }, 23 | "openAiSkuName": { 24 | "value": "S0" 25 | }, 26 | "chatGptDeploymentName": { 27 | "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT=chat}" 28 | }, 29 | "embeddingDeploymentName": { 30 | "value": "${AZURE_OPENAI_EMB_DEPLOYMENT=embedding}" 31 | }, 32 | "openAiHost":{ 33 | "value": "${OPENAI_HOST=azure}" 34 | }, 35 | "openAiApiKey": { 36 | "value": "${AZURE_OPENAI_API_KEY}" 37 | }, 38 | "openAiApiOrganization": { 39 | "value": "${OPENAI_ORGANIZATION}" 40 | }, 41 | "useApplicationInsights": { 42 | "value": "${AZURE_USE_APPLICATION_INSIGHTS=false}" 43 | }, 44 | "useAuthentication": { 45 | "value": "${AZURE_USE_AUTHENTICATION=false}" 46 | }, 47 | "serverAppId": { 48 | "value": "${AZURE_SERVER_APP_ID}" 49 | }, 50 | "serverAppSecret": { 51 | "value": "${AZURE_SERVER_APP_SECRET}" 52 | }, 53 | "clientAppId": { 54 | "value": "${AZURE_CLIENT_APP_ID}" 55 | }, 56 | "allowedOrigin": { 57 | "value": "${ALLOWED_ORIGIN}" 58 | }, 59 | "eventHubsNamespaceConnectionString": { 60 | "value": "${EVENT_HUBS_NAMESPACE_CONNECTION_STRING}" 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | python-dotenv 3 | pandas 4 | plotly 5 | pathway 6 | llm_app 7 | requests 8 | datetime 9 | kafka -------------------------------------------------------------------------------- /scripts/load_env.ps1: -------------------------------------------------------------------------------- 1 | Write-Host "Loading azd .env file from current environment" 2 | foreach ($line in (& azd env get-values)) { 3 | if ($line -match "([^=]+)=(.*)") { 4 | $key = $matches[1] 5 | $value = $matches[2] -replace '^"|"$' 6 | [Environment]::SetEnvironmentVariable($key, $value) 7 | } 8 | } -------------------------------------------------------------------------------- /scripts/load_env.sh: -------------------------------------------------------------------------------- 1 | echo "Loading azd .env file from current environment" 2 | 3 | while IFS='=' read -r key value; do 4 | value=$(echo "$value" | sed 's/^"//' | sed 's/"$//') 5 | export "$key=$value" 6 | done <