├── airflow ├── dags │ ├── custom_operators │ │ ├── __init__.py │ │ ├── mlflow_utils.py │ │ ├── model_utils.py │ │ ├── azure_utils.py │ │ ├── custom_functions_kafka.py │ │ └── custom_functions_model.py │ ├── kafka_dag.py │ └── model_dag.py ├── Dockerfile ├── requirements.txt ├── setup.sh ├── score.py └── docker-compose.yaml ├── .gitignore ├── function_api ├── example_api_call │ ├── requirements.txt │ ├── example_test_data.npy │ └── example_api_call.py ├── function_app │ ├── requirements.txt │ ├── function │ │ ├── function.json │ │ └── __init__.py │ └── host.json └── apimanagement.sh ├── mlflow ├── docker-compose.yml └── setup.sh ├── arm_templates ├── mlstudio │ ├── azureml_parameters.json │ └── azureml_template.json ├── kafka_infra │ ├── kafka_infra_parameters.json │ └── kafka_infra_template.json ├── mlflow_infra │ ├── mlflow_infra_parameters.json │ └── mlflow_infra_template.json ├── airbyte_infra │ ├── airbyte_infra_parameters.json │ └── airbyte_infra_template.json ├── airflow_infra │ ├── airflow_infra_parameters.json │ ├── airflow_storage_private_endpoint_parameters.json │ ├── airflow_mlstudio_private_endpoint_parameters.json │ ├── airflow_storage_private_endpoint_template.json │ ├── airflow_mlstudio_private_endpoint_template.json │ ├── blob_storage_private_link_template.json │ ├── airflow_infra_template.json │ └── azureml_private_link_template.json ├── vnet_peerings │ ├── airbyte_kafka_vnet_peering_parameters.json │ ├── kafka_airflow_vnet_peering_parameters.json │ ├── airflow_mlflow_vnet_peering_parameters.json │ └── vnet_peering_template.json ├── function_app │ ├── function_azureml_private_endpoint_parameters.json │ ├── functionapp_vnet_template.json │ ├── function_azureml_private_link_template.json │ └── function_app_template.json └── blob_storage │ ├── blob_storage_template.json │ └── temp_blob_storage_template.json ├── kafka ├── docker-compose.yml └── setup.sh ├── airbyte ├── setup.sh └── connection_setup.sh ├── scripts └── roles_assignment.ps1 ├── README.md └── azure-pipelines.yml /airflow/dags/custom_operators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | **/__pycache__/ 3 | *.pyc 4 | *.pyo 5 | -------------------------------------------------------------------------------- /function_api/example_api_call/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | numpy==1.23.5 3 | -------------------------------------------------------------------------------- /function_api/function_app/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | azure-functions==1.19.0 3 | azureml-core==1.56.0 4 | -------------------------------------------------------------------------------- /airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:2.9.0-python3.10 2 | COPY requirements.txt . 3 | COPY score.py . 4 | RUN pip install --no-cache-dir -r requirements.txt -------------------------------------------------------------------------------- /function_api/example_api_call/example_test_data.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JuliuszB12/LSTM_Attention_redeployment_for_yahoo_stock_data/HEAD/function_api/example_api_call/example_test_data.npy -------------------------------------------------------------------------------- /mlflow/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | services: 3 | mlflow: 4 | image: ghcr.io/mlflow/mlflow:v2.12.1 5 | ports: 6 | - "5000:5000" 7 | volumes: 8 | - ./mlflow:/mlflow 9 | command: mlflow server --host 0.0.0.0 -------------------------------------------------------------------------------- /airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | pandas==2.2.2 3 | numpy==1.23.5 4 | matplotlib==3.8.4 5 | scikit-learn==1.4.2 6 | kafka-python==2.0.2 7 | mlflow==2.12.1 8 | tensorflow==2.15.1 9 | azure-identity==1.16.0 10 | azure-storage-blob==12.19.1 11 | azureml-core==1.56.0 12 | stockstats==0.6.2 13 | -------------------------------------------------------------------------------- /arm_templates/mlstudio/azureml_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "workspaceName": { 6 | "value": "mlserving" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /arm_templates/kafka_infra/kafka_infra_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "value": "kafka" 7 | }, 8 | "adminPassword": { 9 | "value": "Kafka111@" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /arm_templates/mlflow_infra/mlflow_infra_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "value": "mlflow" 7 | }, 8 | "adminPassword": { 9 | "value": "Mlflow11@" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /arm_templates/airbyte_infra/airbyte_infra_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "value": "airbyte" 7 | }, 8 | "adminPassword": { 9 | "value": "Airbyte1@" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /arm_templates/airflow_infra/airflow_infra_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "value": "airflow" 7 | }, 8 | "adminPassword": { 9 | "value": "Airflow1@" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /function_api/function_app/function/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "bindings": [ 3 | { 4 | "authLevel": "function", 5 | "type": "httpTrigger", 6 | "direction": "in", 7 | "name": "req", 8 | "methods": ["post"] 9 | }, 10 | { 11 | "type": "http", 12 | "direction": "out", 13 | "name": "$return" 14 | } 15 | ], 16 | "scriptFile": "__init__.py" 17 | } 18 | -------------------------------------------------------------------------------- /arm_templates/airflow_infra/airflow_storage_private_endpoint_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "value": "airflow-vnet" 7 | }, 8 | "subnetName": { 9 | "value": "default" 10 | }, 11 | "privateEndpointName": { 12 | "value": "airflow-storage" 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /function_api/function_app/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingExcludedTypes": "Request", 6 | "samplingSettings": { 7 | "isEnabled": true 8 | } 9 | } 10 | }, 11 | "functionTimeout": "00:05:00", 12 | "extensions": { 13 | "http": { 14 | "routePrefix": "api", 15 | "maxOutstandingRequests": 100, 16 | "maxConcurrentRequests": 10 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /arm_templates/vnet_peerings/airbyte_kafka_vnet_peering_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnet1Name": { 6 | "value": "airbyte-vnet" 7 | }, 8 | "vnet2Name": { 9 | "value": "kafka-vnet" 10 | }, 11 | "peeringLink1to2Name": { 12 | "value": "airbyte-kafka" 13 | }, 14 | "peeringLink2to1Name": { 15 | "value": "kafka-airbyte" 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /arm_templates/vnet_peerings/kafka_airflow_vnet_peering_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnet1Name": { 6 | "value": "airflow-vnet" 7 | }, 8 | "vnet2Name": { 9 | "value": "kafka-vnet" 10 | }, 11 | "peeringLink1to2Name": { 12 | "value": "airflow-kafka" 13 | }, 14 | "peeringLink2to1Name": { 15 | "value": "kafka-airflow" 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /arm_templates/vnet_peerings/airflow_mlflow_vnet_peering_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnet1Name": { 6 | "value": "airflow-vnet" 7 | }, 8 | "vnet2Name": { 9 | "value": "mlflow-vnet" 10 | }, 11 | "peeringLink1to2Name": { 12 | "value": "airflow-mlflow" 13 | }, 14 | "peeringLink2to1Name": { 15 | "value": "mlflow-airflow" 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /arm_templates/airflow_infra/airflow_mlstudio_private_endpoint_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "value": "airflow-vnet" 7 | }, 8 | "subnetName": { 9 | "value": "default" 10 | }, 11 | "amlWorkspaceName": { 12 | "value": "mlserving" 13 | }, 14 | "privateEndpointName": { 15 | "value": "airflow-mlstudio" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /arm_templates/function_app/function_azureml_private_endpoint_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "value": "function-vnet" 7 | }, 8 | "subnetName": { 9 | "value": "default" 10 | }, 11 | "amlWorkspaceName": { 12 | "value": "mlserving" 13 | }, 14 | "privateEndpointName": { 15 | "value": "function-mlstudio" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /function_api/example_api_call/example_api_call.py: -------------------------------------------------------------------------------- 1 | # Python 3.10 2 | import sys 3 | import json 4 | import requests 5 | import numpy as np 6 | 7 | resourceGroupName = sys.argv[1] 8 | 9 | x = np.load('example_test_data.npy') 10 | data = [x[0].tolist()] 11 | body = json.dumps(data) 12 | 13 | url = f"https://{resourceGroupName}a1l45.azure-api.net/function/" 14 | data = {"inputs": body} 15 | # headers = {"Ocp-Apim-Subscription-Key": sys.argv[2]} # if auth enabled 16 | 17 | response = requests.post(url, json=data) 18 | # response = requests.post(url, json=data, headers=headers) 19 | 20 | print(response) 21 | print(round(json.loads(response.text)[0][0], 2)) 22 | -------------------------------------------------------------------------------- /kafka/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | zookeeper: 5 | image: docker.io/bitnami/zookeeper:3.9 6 | ports: 7 | - "2181:2181" 8 | volumes: 9 | - "zookeeper_data:/bitnami" 10 | environment: 11 | - ALLOW_ANONYMOUS_LOGIN=yes 12 | kafka: 13 | image: docker.io/bitnami/kafka:3.4 14 | ports: 15 | - "9092:9092" 16 | volumes: 17 | - "kafka_data:/bitnami" 18 | environment: 19 | - KAFKA_CFG_LISTENERS=PLAINTEXT://0.0.0.0:9092 20 | - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://10.1.0.4:9092 21 | - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 22 | - ALLOW_PLAINTEXT_LISTENER=yes 23 | - KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true 24 | depends_on: 25 | - zookeeper 26 | 27 | volumes: 28 | zookeeper_data: 29 | driver: local 30 | kafka_data: 31 | driver: local 32 | -------------------------------------------------------------------------------- /mlflow/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Updating packages..." 4 | sudo apt-get update -y 5 | 6 | echo "Installing required packages..." 7 | sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release -y 8 | 9 | echo "Configuring Docker repository..." 10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 11 | 12 | echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 13 | 14 | sudo apt-get update 15 | 16 | echo "Installing Docker..." 17 | sudo apt-get install docker-ce docker-ce-cli -y 18 | 19 | echo "Installing Docker Compose..." 20 | sudo apt-get install docker-compose-plugin -y 21 | 22 | echo "Starting MLflow..." 23 | sudo docker compose up 24 | 25 | echo "Setup complete!" 26 | -------------------------------------------------------------------------------- /kafka/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Updating packages..." 4 | sudo apt-get update -y 5 | 6 | echo "Installing required packages..." 7 | sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release -y 8 | 9 | echo "Configuring Docker repository..." 10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 11 | 12 | echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 13 | 14 | sudo apt-get update 15 | 16 | echo "Installing Docker..." 17 | sudo apt-get install docker-ce docker-ce-cli -y 18 | 19 | echo "Installing Docker Compose..." 20 | sudo apt-get install docker-compose-plugin -y 21 | 22 | echo "Starting Zookeeper and Kafka..." 23 | sudo docker compose up 24 | 25 | echo "Setup complete!" 26 | -------------------------------------------------------------------------------- /airbyte/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Updating packages..." 4 | sudo apt-get update -y 5 | 6 | echo "Installing required packages..." 7 | sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release -y 8 | 9 | echo "Configuring Docker repository..." 10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 11 | 12 | echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 13 | 14 | sudo apt-get update 15 | 16 | echo "Installing Docker..." 17 | sudo apt-get install docker-ce docker-ce-cli -y 18 | 19 | echo "Installing Docker Compose..." 20 | sudo apt-get install docker-compose-plugin -y 21 | 22 | echo "Getting Airbyte..." 23 | wget https://raw.githubusercontent.com/airbytehq/airbyte/master/run-ab-platform.sh 24 | 25 | echo "Running Airbyte..." 26 | chmod +x run-ab-platform.sh 27 | sudo ./run-ab-platform.sh -b 28 | 29 | echo "Creating connection..." 30 | sleep 300 31 | chmod +x ./connection_setup.sh 32 | ./connection_setup.sh 33 | -------------------------------------------------------------------------------- /airflow/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Updating packages..." 4 | sudo apt-get update -y 5 | 6 | echo "Installing required packages..." 7 | sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release -y 8 | 9 | echo "Configuring Docker repository..." 10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 11 | 12 | echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 13 | 14 | sudo apt-get update 15 | 16 | echo "Installing Docker..." 17 | sudo apt-get install docker-ce docker-ce-cli -y 18 | 19 | echo "Installing Docker Compose..." 20 | sudo apt-get install docker-compose-plugin -y 21 | 22 | echo "Creating directories for configuration and logs..." 23 | mkdir -p ./logs ./plugins ./config 24 | 25 | echo "Setting up environment variables..." 26 | echo -e "AIRFLOW_UID=$(id -u)" > .env 27 | 28 | echo "Initializing Airflow..." 29 | sudo docker compose up airflow-init --build 30 | 31 | echo "Starting Airflow..." 32 | sudo docker compose up --build 33 | 34 | echo "Setup complete!" 35 | -------------------------------------------------------------------------------- /airflow/dags/kafka_dag.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.operators.python_operator import PythonOperator 3 | from datetime import datetime, timedelta 4 | from custom_operators.custom_functions_kafka import consume_kafka_task, upload_blob_task 5 | 6 | 7 | default_args = { 8 | 'owner': 'airflow', 9 | 'depends_on_past': False, 10 | 'start_date': datetime(2023, 1, 1), 11 | 'email_on_failure': False, 12 | 'email_on_retry': False, 13 | 'retries': 0, 14 | 'retry_delay': timedelta(minutes=1), 15 | } 16 | 17 | dag = DAG( 18 | 'kafka_and_azure_blob_dag', 19 | default_args=default_args, 20 | description='DAG for consuming Kafka messages and uploading to Blob Storage', 21 | schedule_interval=timedelta(minutes=1), 22 | is_paused_upon_creation=False, 23 | catchup=False 24 | ) 25 | 26 | consume_task = PythonOperator( 27 | task_id='consume_kafka_task', 28 | python_callable=consume_kafka_task, 29 | execution_timeout=timedelta(minutes=1), 30 | dag=dag 31 | ) 32 | 33 | upload_task = PythonOperator( 34 | task_id='upload_blob_task', 35 | python_callable=upload_blob_task, 36 | execution_timeout=timedelta(minutes=1), 37 | dag=dag 38 | ) 39 | 40 | consume_task >> upload_task 41 | -------------------------------------------------------------------------------- /airflow/score.py: -------------------------------------------------------------------------------- 1 | import json 2 | import joblib 3 | import numpy as np 4 | import mlflow.pyfunc 5 | from azureml.core.model import Model 6 | 7 | 8 | def init(): 9 | # Usage of global variables is imposed by Azure documentation 10 | global model 11 | global scaler 12 | global y_scaler 13 | global tickers_len 14 | model_path = Model.get_model_path('LSTM_Attention_stock_price_regression') 15 | model = mlflow.pyfunc.load_model(model_path) 16 | scaler = joblib.load(model_path + '/scaler.joblib') 17 | y_scaler = joblib.load(model_path + '/y_scaler.joblib') 18 | with open(model_path + '/tickers_len.txt', 'r') as file: 19 | tickers_len = file.read() 20 | tickers_len = int(tickers_len) 21 | 22 | 23 | def run(raw_data): 24 | data = json.loads(raw_data) 25 | array = np.array(data) 26 | data_to_scale = array[:, :, tickers_len:].reshape(-1, array.shape[-1] - tickers_len) 27 | scaled_data = scaler.transform(data_to_scale) 28 | array[:, :, tickers_len:] = scaled_data.reshape(array.shape[0], array.shape[1], -1) 29 | result = model.predict(array) 30 | result = y_scaler.inverse_transform(result) 31 | result = json.dumps(result.tolist()) 32 | return result 33 | -------------------------------------------------------------------------------- /function_api/function_app/function/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import azure.functions as func 3 | from azureml.core.authentication import MsiAuthentication 4 | from azureml.core.webservice import AksWebservice 5 | from azureml.core import Workspace 6 | 7 | 8 | def main(req: func.HttpRequest) -> func.HttpResponse: 9 | try: 10 | # Try to get JSON from the request body 11 | req_body = req.get_json() 12 | except ValueError: 13 | # If JSON parsing fails, return a 400 error 14 | return func.HttpResponse("Invalid JSON", status_code=400) 15 | 16 | inputs = req_body.get('inputs') 17 | 18 | subscriptionId = os.getenv('subscriptionId') 19 | resourceGroup = os.getenv('resourceGroup') 20 | msi_auth = MsiAuthentication() 21 | ws = Workspace(subscription_id=subscriptionId, 22 | resource_group=resourceGroup, 23 | workspace_name="mlserving", 24 | auth=msi_auth) 25 | service = AksWebservice(ws, 'lstm-service') 26 | response = service.run(input_data=inputs) 27 | 28 | if response: 29 | return func.HttpResponse(response, status_code=200) 30 | else: 31 | return func.HttpResponse("Please pass input in the request body", status_code=400) 32 | -------------------------------------------------------------------------------- /scripts/roles_assignment.ps1: -------------------------------------------------------------------------------- 1 | param ( 2 | [string]$resourceGroupName, 3 | [string]$storageAccountName, 4 | [string]$functionAppName 5 | ) 6 | 7 | $vmName = "airflow" 8 | $amlWorkspaceName = "mlserving" 9 | $vm = Get-AzVM -ResourceGroupName $resourceGroupName -Name $vmName 10 | $storageAccount = Get-AzStorageAccount -ResourceGroupName $resourceGroupName -Name $storageAccountName 11 | $functionApp = Get-AzWebApp -ResourceGroupName $resourceGroupName -Name $functionAppName 12 | $amlWorkspace = Get-AzResource -ResourceGroupName $resourceGroupName -ResourceType "Microsoft.MachineLearningServices/workspaces" -Name $amlWorkspaceName 13 | $vmResourceId = $vm.Identity.PrincipalId 14 | $storageAccountResourceId = $storageAccount.Id 15 | $amlWorkspaceResourceId = $amlWorkspace.ResourceId 16 | $principalId = $functionApp.Identity.PrincipalId 17 | New-AzRoleAssignment -ObjectId $vmResourceId -RoleDefinitionName "Contributor" -Scope $storageAccountResourceId 18 | New-AzRoleAssignment -ObjectId $vmResourceId -RoleDefinitionName "Storage Blob Data Contributor" -Scope $storageAccountResourceId 19 | New-AzRoleAssignment -ObjectId $vmResourceId -RoleDefinitionName "Contributor" -Scope $amlWorkspaceResourceId 20 | New-AzRoleAssignment -ObjectId $principalId -RoleDefinitionName "Contributor" -Scope $amlWorkspaceResourceId 21 | -------------------------------------------------------------------------------- /airflow/dags/model_dag.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.operators.python_operator import PythonOperator 3 | from datetime import datetime, timedelta 4 | from custom_operators.custom_functions_model import train_model_task, deploy_azureml_task 5 | 6 | 7 | default_args = { 8 | 'owner': 'airflow', 9 | 'depends_on_past': False, 10 | 'start_date': datetime(2023, 1, 1), 11 | 'email_on_failure': False, 12 | 'email_on_retry': False, 13 | 'retries': 0, 14 | 'retry_delay': timedelta(minutes=15), 15 | } 16 | 17 | dag = DAG( 18 | 'train_and_deploy_model_dag', 19 | default_args=default_args, 20 | description='DAG for training model and deploy it to Azure ML Endpoint', 21 | schedule_interval=timedelta(minutes=15), 22 | is_paused_upon_creation=False, 23 | max_active_runs=1, 24 | catchup=False 25 | ) 26 | 27 | train_task = PythonOperator( 28 | task_id='train_model_task', 29 | python_callable=train_model_task, 30 | execution_timeout=timedelta(minutes=10), 31 | provide_context=True, 32 | dag=dag 33 | ) 34 | 35 | deploy_task = PythonOperator( 36 | task_id='deploy_azureml_task', 37 | python_callable=deploy_azureml_task, 38 | execution_timeout=timedelta(minutes=55), 39 | provide_context=True, 40 | dag=dag 41 | ) 42 | 43 | train_task >> deploy_task 44 | -------------------------------------------------------------------------------- /arm_templates/airflow_infra/airflow_storage_private_endpoint_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "type": "string" 7 | }, 8 | "subnetName": { 9 | "type": "string" 10 | }, 11 | "storageAccountName": { 12 | "type": "string" 13 | }, 14 | "privateEndpointName": { 15 | "type": "string" 16 | } 17 | }, 18 | "variables": { 19 | "location": "[resourceGroup().location]" 20 | }, 21 | "resources": [ 22 | { 23 | "type": "Microsoft.Network/privateEndpoints", 24 | "apiVersion": "2020-06-01", 25 | "name": "[parameters('privateEndpointName')]", 26 | "location": "[variables('location')]", 27 | "properties": { 28 | "subnet": { 29 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('subnetName'))]" 30 | }, 31 | "privateLinkServiceConnections": [ 32 | { 33 | "name": "myPrivateLinkServiceConnection", 34 | "properties": { 35 | "privateLinkServiceId": "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]", 36 | "groupIds": ["blob"], 37 | "requestMessage": "Please approve my connection" 38 | } 39 | } 40 | ], 41 | "manualPrivateLinkServiceConnections": [] 42 | } 43 | } 44 | ] 45 | } 46 | -------------------------------------------------------------------------------- /arm_templates/blob_storage/blob_storage_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "storageAccountName": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "The name of the storage account." 9 | } 10 | }, 11 | "containerName": { 12 | "type": "string", 13 | "metadata": { 14 | "description": "The name of the container in the storage account." 15 | } 16 | } 17 | }, 18 | "resources": [ 19 | { 20 | "type": "Microsoft.Storage/storageAccounts", 21 | "apiVersion": "2019-06-01", 22 | "name": "[parameters('storageAccountName')]", 23 | "location": "[resourceGroup().location]", 24 | "sku": { 25 | "name": "Standard_LRS" 26 | }, 27 | "kind": "StorageV2", 28 | "properties": { 29 | "networkAcls": { 30 | "bypass": "AzureServices", 31 | "virtualNetworkRules": [], 32 | "ipRules": [], 33 | "defaultAction": "Deny" 34 | } 35 | }, 36 | "resources": [ 37 | { 38 | "type": "blobServices/containers", 39 | "apiVersion": "2019-06-01", 40 | "name": "[concat('default/', parameters('containerName'))]", 41 | "dependsOn": [ 42 | "[concat('Microsoft.Storage/storageAccounts/', parameters('storageAccountName'))]" 43 | ], 44 | "properties": {} 45 | } 46 | ] 47 | } 48 | ] 49 | } 50 | 51 | -------------------------------------------------------------------------------- /arm_templates/airflow_infra/airflow_mlstudio_private_endpoint_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "type": "string" 7 | }, 8 | "subnetName": { 9 | "type": "string" 10 | }, 11 | "amlWorkspaceName": { 12 | "type": "string" 13 | }, 14 | "privateEndpointName": { 15 | "type": "string" 16 | } 17 | }, 18 | "variables": { 19 | "location": "[resourceGroup().location]" 20 | }, 21 | "resources": [ 22 | { 23 | "type": "Microsoft.Network/privateEndpoints", 24 | "apiVersion": "2020-06-01", 25 | "name": "[parameters('privateEndpointName')]", 26 | "location": "[variables('location')]", 27 | "properties": { 28 | "subnet": { 29 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('subnetName'))]" 30 | }, 31 | "privateLinkServiceConnections": [ 32 | { 33 | "name": "[concat('myAmlPrivateLink', parameters('vnetName'))]", 34 | "properties": { 35 | "privateLinkServiceId": "[resourceId('Microsoft.MachineLearningServices/workspaces', parameters('amlWorkspaceName'))]", 36 | "groupIds": ["amlworkspace"], 37 | "requestMessage": "Please approve my connection to AML Workspace" 38 | } 39 | } 40 | ], 41 | "manualPrivateLinkServiceConnections": [] 42 | } 43 | } 44 | ] 45 | } 46 | 47 | -------------------------------------------------------------------------------- /function_api/apimanagement.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Assigning parameters to variables 4 | RESOURCE_GROUP=$1 5 | LOCATION=$2 6 | APIM_NAME=$3 7 | FUNCTION_APP_NAME=$4 8 | API_NAME="function" 9 | 10 | # Create API Management instance 11 | az apim create \ 12 | --resource-group $RESOURCE_GROUP \ 13 | --name $APIM_NAME \ 14 | --location $LOCATION \ 15 | --publisher-email "randomemail7564231@gmail.com" \ 16 | --publisher-name "Publisher Name" 17 | 18 | # Get the function key 19 | FUNCTION_KEY=$(az functionapp function keys list \ 20 | --resource-group $RESOURCE_GROUP \ 21 | --name $FUNCTION_APP_NAME \ 22 | --function-name $API_NAME \ 23 | --query "default" -o tsv) 24 | 25 | # Create API in API Management 26 | az apim api create \ 27 | --resource-group $RESOURCE_GROUP \ 28 | --service-name $APIM_NAME \ 29 | --api-id $API_NAME \ 30 | --path $API_NAME \ 31 | --display-name $API_NAME \ 32 | --service-url "https://$FUNCTION_APP_NAME.azurewebsites.net/api/$API_NAME" \ 33 | --protocols https 34 | 35 | # Create an operation in the API 36 | az apim api operation create \ 37 | --resource-group $RESOURCE_GROUP \ 38 | --service-name $APIM_NAME \ 39 | --api-id $API_NAME \ 40 | --operation-id "post-operation" \ 41 | --display-name "Post Operation" \ 42 | --method POST \ 43 | --url-template "/" 44 | 45 | # Create a named value in API Management for the function key 46 | az apim nv create \ 47 | --resource-group $RESOURCE_GROUP \ 48 | --service-name $APIM_NAME \ 49 | --named-value-id "function-key" \ 50 | --display-name "FunctionKey" \ 51 | --value $FUNCTION_KEY \ 52 | --secret true 53 | -------------------------------------------------------------------------------- /airflow/dags/custom_operators/mlflow_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import numpy as np 3 | from sklearn.metrics import mean_squared_error 4 | import mlflow 5 | from mlflow.tracking import MlflowClient 6 | 7 | 8 | def get_latest_model_version(model_name: str, client: MlflowClient) -> Optional[str]: 9 | versions = client.search_model_versions(f"name='{model_name}'") 10 | latest_version = max(versions, key=lambda version: int(version.version)) 11 | if latest_version is not None: 12 | return latest_version.version 13 | return None 14 | 15 | 16 | def compare_and_update_production_stage(model_name: str, mse: float, x_val: 17 | np.ndarray[np.ndarray[np.ndarray[np.float64]]], y_val: np.ndarray[np.float64]) -> str: 18 | client = MlflowClient() 19 | try: 20 | prod_version = client.get_model_version_by_alias(model_name, "production").version 21 | except mlflow.exceptions.RestException: 22 | prod_version = None 23 | if prod_version is not None: 24 | model_uri = f"models:/{model_name}/{prod_version}" 25 | loaded_model = mlflow.pyfunc.load_model(model_uri) 26 | predictions_prev_model = loaded_model.predict(x_val) 27 | mse_prev_model = mean_squared_error(y_val, predictions_prev_model) 28 | if mse < mse_prev_model: 29 | better_version = get_latest_model_version(model_name, client) 30 | client.set_registered_model_alias(model_name, "production", better_version) 31 | return 'new_version' 32 | else: 33 | versions = client.search_model_versions(f"name='{model_name}'") 34 | num_versions = len(versions) 35 | if num_versions < 5: 36 | return 'new_version' 37 | else: 38 | return 'old_version' 39 | else: 40 | better_version = get_latest_model_version(model_name, client) 41 | client.set_registered_model_alias(model_name, "production", better_version) 42 | return 'new_model' 43 | -------------------------------------------------------------------------------- /arm_templates/vnet_peerings/vnet_peering_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnet1Name": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "The name of the first Virtual Network." 9 | } 10 | }, 11 | "vnet2Name": { 12 | "type": "string", 13 | "metadata": { 14 | "description": "The name of the second Virtual Network." 15 | } 16 | }, 17 | "peeringLink1to2Name": { 18 | "type": "string", 19 | "metadata": { 20 | "description": "The name of the VNet peering from VNet1 to VNet2." 21 | } 22 | }, 23 | "peeringLink2to1Name": { 24 | "type": "string", 25 | "metadata": { 26 | "description": "The name of the VNet peering from VNet2 to VNet1." 27 | } 28 | } 29 | }, 30 | "resources": [ 31 | { 32 | "type": "Microsoft.Network/virtualNetworks/virtualNetworkPeerings", 33 | "name": "[concat(parameters('vnet1Name'), '/', parameters('peeringLink1to2Name'))]", 34 | "apiVersion": "2021-02-01", 35 | "location": "[resourceGroup().location]", 36 | "properties": { 37 | "allowVirtualNetworkAccess": true, 38 | "allowForwardedTraffic": true, 39 | "allowGatewayTransit": false, 40 | "useRemoteGateways": false, 41 | "remoteVirtualNetwork": { 42 | "id": "[resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks', parameters('vnet2Name'))]" 43 | } 44 | } 45 | }, 46 | { 47 | "type": "Microsoft.Network/virtualNetworks/virtualNetworkPeerings", 48 | "name": "[concat(parameters('vnet2Name'), '/', parameters('peeringLink2to1Name'))]", 49 | "apiVersion": "2021-02-01", 50 | "location": "[resourceGroup().location]", 51 | "properties": { 52 | "allowVirtualNetworkAccess": true, 53 | "allowForwardedTraffic": true, 54 | "allowGatewayTransit": false, 55 | "useRemoteGateways": false, 56 | "remoteVirtualNetwork": { 57 | "id": "[resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks', parameters('vnet1Name'))]" 58 | } 59 | } 60 | } 61 | ] 62 | } 63 | -------------------------------------------------------------------------------- /arm_templates/function_app/functionapp_vnet_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "variables": { 5 | "vNetName": "function-vnet", 6 | "location": "[resourceGroup().location]" 7 | }, 8 | "resources": [ 9 | { 10 | "type": "Microsoft.Network/virtualNetworks", 11 | "apiVersion": "2023-11-01", 12 | "name": "[variables('vNetName')]", 13 | "location": "[variables('location')]", 14 | "properties": { 15 | "addressSpace": { 16 | "addressPrefixes": [ 17 | "10.5.0.0/16" 18 | ] 19 | }, 20 | "encryption": { 21 | "enabled": false, 22 | "enforcement": "AllowUnencrypted" 23 | }, 24 | "subnets": [ 25 | { 26 | "name": "default", 27 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', variables('vNetName'), 'default')]", 28 | "properties": { 29 | "addressPrefixes": [ 30 | "10.5.0.0/24" 31 | ], 32 | "delegations": [], 33 | "privateEndpointNetworkPolicies": "Disabled", 34 | "privateLinkServiceNetworkPolicies": "Enabled" 35 | }, 36 | "type": "Microsoft.Network/virtualNetworks/subnets" 37 | } 38 | ], 39 | "virtualNetworkPeerings": [], 40 | "enableDdosProtection": false 41 | } 42 | }, 43 | { 44 | "type": "Microsoft.Network/virtualNetworks/subnets", 45 | "apiVersion": "2023-11-01", 46 | "name": "[concat(variables('vNetName'), '/default')]", 47 | "dependsOn": [ 48 | "[resourceId('Microsoft.Network/virtualNetworks', variables('vNetName'))]" 49 | ], 50 | "properties": { 51 | "addressPrefixes": [ 52 | "10.5.0.0/24" 53 | ], 54 | "delegations": [], 55 | "privateEndpointNetworkPolicies": "Disabled", 56 | "privateLinkServiceNetworkPolicies": "Enabled" 57 | } 58 | } 59 | ] 60 | } -------------------------------------------------------------------------------- /arm_templates/airflow_infra/blob_storage_private_link_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "storageAccountName": { 6 | "type": "string" 7 | } 8 | }, 9 | "variables": { 10 | "privateDnsZonesName": "privatelink.blob.core.windows.net", 11 | "virtualNetworkExternalID": "[concat('/subscriptions/', subscription().subscriptionId, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.Network/virtualNetworks/airflow-vnet')]" 12 | }, 13 | "resources": [ 14 | { 15 | "type": "Microsoft.Network/privateDnsZones", 16 | "apiVersion": "2018-09-01", 17 | "name": "[variables('privateDnsZonesName')]", 18 | "location": "global", 19 | "properties": { 20 | "maxNumberOfRecordSets": 25000, 21 | "maxNumberOfVirtualNetworkLinks": 1000, 22 | "maxNumberOfVirtualNetworkLinksWithRegistration": 100, 23 | "numberOfRecordSets": 2, 24 | "numberOfVirtualNetworkLinks": 1, 25 | "numberOfVirtualNetworkLinksWithRegistration": 0, 26 | "provisioningState": "Succeeded" 27 | } 28 | }, 29 | { 30 | "type": "Microsoft.Network/privateDnsZones/A", 31 | "apiVersion": "2018-09-01", 32 | "name": "[concat(variables('privateDnsZonesName'), '/', parameters('storageAccountName'))]", 33 | "dependsOn": [ 34 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZonesName'))]" 35 | ], 36 | "properties": { 37 | "ttl": 3600, 38 | "aRecords": [ 39 | { 40 | "ipv4Address": "10.3.0.5" 41 | } 42 | ] 43 | } 44 | }, 45 | { 46 | "type": "Microsoft.Network/privateDnsZones/SOA", 47 | "apiVersion": "2018-09-01", 48 | "name": "[concat(variables('privateDnsZonesName'), '/@')]", 49 | "dependsOn": [ 50 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZonesName'))]" 51 | ], 52 | "properties": { 53 | "ttl": 3600, 54 | "soaRecord": { 55 | "email": "azureprivatedns-host.microsoft.com", 56 | "expireTime": 2419200, 57 | "host": "azureprivatedns.net", 58 | "minimumTtl": 10, 59 | "refreshTime": 3600, 60 | "retryTime": 300, 61 | "serialNumber": 1 62 | } 63 | } 64 | }, 65 | { 66 | "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", 67 | "apiVersion": "2018-09-01", 68 | "name": "[concat(variables('privateDnsZonesName'), '/rkuuapfvmw5vk')]", 69 | "location": "global", 70 | "dependsOn": [ 71 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZonesName'))]" 72 | ], 73 | "properties": { 74 | "registrationEnabled": false, 75 | "virtualNetwork": { 76 | "id": "[variables('virtualNetworkExternalID')]" 77 | } 78 | } 79 | } 80 | ] 81 | } 82 | -------------------------------------------------------------------------------- /airbyte/connection_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Fetch Airbyte workspace ID 4 | workspace_id=$(curl -u airbyte:password -X POST http://localhost:8000/api/v1/workspaces/list \ 5 | -H "Content-Type: application/json" \ 6 | -d '{}' | python3 -c "import sys, json; print(json.load(sys.stdin)['workspaces'][0]['workspaceId'])") 7 | 8 | # Create Yahoo Finance Price source with the Airbyte workspace ID 9 | source_id=$(curl -u airbyte:password -X POST http://localhost:8000/api/v1/sources/create \ 10 | -H "Content-Type: application/json" \ 11 | -d '{ 12 | "sourceDefinitionId": "09a517d3-803f-448d-97bf-0b1ee64b90ef", 13 | "workspaceId": "'$workspace_id'", 14 | "connectionConfiguration": { 15 | "tickers": "AMZN, AXP", 16 | "interval": "1d", 17 | "range": "1y" 18 | }, 19 | "name": "Yahoo source" 20 | }' | python3 -c "import sys, json; print(json.load(sys.stdin)['sourceId'])") 21 | 22 | # Create Kafka destination with the Airbyte workspace ID 23 | destination_id=$(curl -u airbyte:password -X POST http://localhost:8000/api/v1/destinations/create \ 24 | -H "Content-Type: application/json" \ 25 | -d '{ 26 | "destinationDefinitionId": "9f760101-60ae-462f-9ee6-b7a9dafd454d", 27 | "workspaceId": "'$workspace_id'", 28 | "connectionConfiguration": { 29 | "acks": "all", 30 | "batch_size": 16384, 31 | "bootstrap_servers": "10.1.0.4:9092", 32 | "buffer_memory": "33554432", 33 | "client_dns_lookup": "use_all_dns_ips", 34 | "compression_type": "none", 35 | "delivery_timeout_ms": 120000, 36 | "enable_idempotence": true, 37 | "linger_ms": "1", 38 | "max_block_ms": "60000", 39 | "max_in_flight_requests_per_connection": 5, 40 | "max_request_size": 1048576, 41 | "protocol": { 42 | "security_protocol": "PLAINTEXT" 43 | }, 44 | "receive_buffer_bytes": -1, 45 | "request_timeout_ms": 30000, 46 | "retries": 2147483647, 47 | "send_buffer_bytes": -1, 48 | "socket_connection_setup_timeout_ms": "10000", 49 | "socket_connection_setup_timeout_max_ms": "30000", 50 | "topic_pattern": "stock", 51 | "client_id": "airbyte-producer", 52 | "sync_producer": false, 53 | "test_topic": "testing" 54 | }, 55 | "name": "Kafka destination" 56 | }' | python3 -c "import sys, json; print(json.load(sys.stdin)['destinationId'])") 57 | 58 | # Create connection between created source and destination 59 | curl -u airbyte:password -X POST http://localhost:8000/api/v1/connections/create \ 60 | -H "Content-Type: application/json" \ 61 | -d '{ 62 | "sourceId": "'$source_id'", 63 | "destinationId": "'$destination_id'", 64 | "syncCatalog": { 65 | "streams": [ 66 | { 67 | "stream": { 68 | "name": "price", 69 | "jsonSchema": { 70 | "type": "object", 71 | "properties": { 72 | "chart": { 73 | "type": "object", 74 | "properties": { 75 | "result": { 76 | "type": "array", 77 | "items": {} 78 | } 79 | } 80 | } 81 | } 82 | }, 83 | "supportedSyncModes": ["full_refresh"], 84 | "sourceDefinedCursor": false, 85 | "defaultCursorField": [], 86 | "sourceDefinedPrimaryKey": [], 87 | "namespace": null 88 | }, 89 | "config": { 90 | "syncMode": "full_refresh", 91 | "cursorField": [], 92 | "destinationSyncMode": "append", 93 | "primaryKey": [], 94 | "selected": true, 95 | "aliasName": "alias_price" 96 | } 97 | } 98 | ] 99 | }, 100 | "scheduleType" : "cron", 101 | "scheduleData": { 102 | "cron": { 103 | "cronExpression": "*/7 * * * * ?", 104 | "cronTimeZone": "UTC" 105 | } 106 | }, 107 | "status": "active", 108 | "name": "Yahoo to Kafka" 109 | }' 110 | -------------------------------------------------------------------------------- /airflow/dags/custom_operators/model_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | import pandas as pd 3 | import numpy as np 4 | from stockstats import wrap 5 | from sklearn.preprocessing import MinMaxScaler 6 | from tensorflow.keras.layers import Layer, Softmax, Input, LSTM, Dense 7 | import tensorflow.keras.backend as K 8 | from tensorflow.keras.models import Model 9 | 10 | model_name = "LSTM_Attention_stock_price_regression" 11 | 12 | 13 | def preprocess_data(sequence_length: int, read_blob: Callable[[str], pd.DataFrame], tickers: list[str], account_name: str, 14 | container_name: str) -> \ 15 | tuple[np.ndarray[np.ndarray[np.ndarray[np.float64]]], np.ndarray[np.float64], MinMaxScaler, MinMaxScaler]: 16 | """ 17 | | Params:\n 18 | | sequence_length - length of LSTM time series\n 19 | | read_blob - function returning Pandas DataFrame from Azure Blob Storage data\n 20 | | tickers - list of tickers for stock data\n 21 | | account_name - name of Azure Blob Storage Account\n 22 | container_name - name of container in Azure Blob Storage 23 | """ 24 | df = read_blob(account_name, container_name) 25 | df = df.rename(columns={'timestamp': 'date'}) 26 | df = pd.get_dummies(df, columns=['symbol'], prefix='', prefix_sep='') 27 | df[tickers] = df[tickers].astype(int) 28 | lower_tickers = [x.lower() for x in tickers] 29 | x_lstm = [] 30 | y_lstm = [] 31 | 32 | for i in tickers: 33 | temp_df = df[df[i] == 1].copy() 34 | if len(temp_df) % sequence_length == 0: 35 | temp_df = temp_df.iloc[sequence_length-3:] 36 | elif len(temp_df) % sequence_length == 1: 37 | temp_df = temp_df.iloc[sequence_length-2:] 38 | elif len(temp_df) % sequence_length == 2: 39 | temp_df = temp_df.iloc[sequence_length-1:] 40 | elif len(temp_df) > sequence_length: 41 | rows_to_drop = len(temp_df) % sequence_length 42 | temp_df = temp_df.iloc[rows_to_drop-3:] 43 | else: 44 | raise Exception("Too short data") 45 | temp_df = wrap(temp_df) 46 | temp_df = temp_df[lower_tickers+['close', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 47 | 'close_30_sma', 'close_60_sma', 'aroon']] 48 | temp_df["Y"] = temp_df['close'].shift(-3) 49 | temp_df = temp_df.fillna(0) 50 | temp_df = temp_df.iloc[:-3] 51 | x_train = temp_df.drop(['Y'], axis=1) 52 | y_train = temp_df['Y'].to_numpy().reshape(-1, 1) 53 | scaler = MinMaxScaler() 54 | columns_to_scale = [col for col in x_train.columns if col not in lower_tickers] 55 | x_train[columns_to_scale] = scaler.fit_transform(x_train[columns_to_scale]) 56 | y_scaler = MinMaxScaler() 57 | y_train = y_scaler.fit_transform(y_train) 58 | for i in range(len(x_train) - sequence_length + 1): 59 | x_lstm.append(x_train.iloc[i:i + sequence_length].to_numpy()) 60 | y_lstm.append(y_train[i + sequence_length - 1, 0]) 61 | x_lstm = np.array(x_lstm) 62 | y_lstm = np.array(y_lstm) 63 | return x_lstm, y_lstm, scaler, y_scaler 64 | 65 | 66 | class CustomAttention(Layer): 67 | def __init__(self, **kwargs): 68 | super(CustomAttention, self).__init__(**kwargs) 69 | 70 | def build(self, input_shape): 71 | self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1), 72 | initializer="normal") 73 | self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), 74 | initializer="zeros") 75 | super(CustomAttention, self).build(input_shape) 76 | 77 | def call(self, x): 78 | # Applying a simple attention mechanism 79 | e = K.tanh(K.dot(x, self.W) + self.b) 80 | e = K.squeeze(e, axis=-1) 81 | alpha = Softmax(axis=-1)(e) 82 | alpha = K.expand_dims(alpha, axis=-1) 83 | context = x * alpha 84 | context = K.sum(context, axis=1) 85 | return context 86 | 87 | 88 | def build_model(input_shape: tuple[int, int], unit_number: int) -> Model: 89 | """ 90 | | Params:\n 91 | | input_shape - length of LSTM time series and input for each time step\n 92 | unit_number - number of units in LSTM layer 93 | """ 94 | inputs = Input(shape=input_shape) 95 | lstm_out = LSTM(unit_number, return_sequences=True)(inputs) # Return sequences for attention 96 | attention_out = CustomAttention()(lstm_out) 97 | outputs = Dense(1)(attention_out) # Predicting the next stock price 98 | model = Model(inputs=inputs, outputs=outputs) 99 | model.compile(optimizer='adam', loss='mean_squared_error') 100 | return model 101 | -------------------------------------------------------------------------------- /airflow/dags/custom_operators/azure_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime, timedelta 3 | import requests 4 | import pandas as pd 5 | import pickle 6 | from azure.storage.blob import BlobServiceClient 7 | from azure.identity import DefaultAzureCredential 8 | from azureml.core.authentication import MsiAuthentication 9 | from azureml.core import Model, Workspace 10 | import mlflow 11 | from mlflow.tracking import MlflowClient 12 | 13 | 14 | def read_blob(account_name: str, container_name: str) -> pd.DataFrame: 15 | """ 16 | Read blob data from Azure Blob Storage container 17 | """ 18 | url = f"https://{account_name}.blob.core.windows.net" 19 | credential = DefaultAzureCredential() 20 | blob_service_client = BlobServiceClient(account_url=url, credential=credential) 21 | container_client = blob_service_client.get_container_client(container_name) 22 | 23 | # Calculate the date 3 months ago 24 | three_months_ago = datetime.now() - timedelta(days=90) 25 | 26 | # List to hold all DataFrames 27 | dataframes = [] 28 | 29 | try: 30 | blobs = container_client.list_blobs() 31 | for blob in blobs: 32 | # Example blob name: 'some_folder_name/yyyy/mm/dd/HH/MM/file_name.pkl' 33 | parts = blob.name.split('/') 34 | if len(parts) > 5 and parts[1].isdigit() and parts[2].isdigit() and parts[3].isdigit(): 35 | # Parse the date from the blob name 36 | year = int(parts[1]) 37 | month = int(parts[2]) 38 | day = int(parts[3]) 39 | blob_date = datetime(year, month, day) 40 | 41 | # Check if the blob date is within the last three months 42 | if blob_date > three_months_ago: 43 | blob_client = container_client.get_blob_client(blob) 44 | blob_data = blob_client.download_blob().readall() 45 | data = pickle.loads(blob_data) 46 | if isinstance(data, pd.DataFrame): # Check if data is a DataFrame 47 | dataframes.append(data) 48 | else: 49 | raise Exception(f"Blob {blob.name} does not contain a DataFrame.") 50 | 51 | # Concatenate all DataFrames into one 52 | if dataframes: 53 | final_df = pd.concat(dataframes, ignore_index=True) 54 | return final_df 55 | else: 56 | raise Exception("No DataFrame blobs found from the last 3 months.") 57 | 58 | except Exception as e: 59 | print(f"An error occurred: {e}") 60 | 61 | 62 | def auth_ws_register_model(model_name: str, tickers: list[str]) -> tuple[Workspace, Model]: 63 | # Download mlflow production version of trained model 64 | mlflow.set_tracking_uri("http://10.4.0.4:5000") 65 | client = MlflowClient() 66 | prod_version = client.get_model_version_by_alias(model_name, "production") 67 | model_uri = f"models:/{model_name}/{prod_version.version}" 68 | local_path = mlflow.artifacts.download_artifacts(model_uri, dst_path=f'{os.getcwd()}/artifacts') 69 | artifact_uri = f'runs:/{prod_version.run_id}/scaler.joblib' 70 | _ = mlflow.artifacts.download_artifacts(artifact_uri, dst_path=local_path) 71 | artifact_uri = f'runs:/{prod_version.run_id}/y_scaler.joblib' 72 | _ = mlflow.artifacts.download_artifacts(artifact_uri, dst_path=local_path) 73 | tickers_len = len(tickers) 74 | with open(local_path + 'tickers_len.txt', 'w') as file: 75 | file.write(str(tickers_len)) 76 | 77 | # Auth to Azure ML Workspace with System-assigned managed identity 78 | msi_auth = MsiAuthentication() 79 | subscription_id, resource_group = get_azure_vm_metadata() 80 | ws = Workspace(subscription_id=subscription_id, 81 | resource_group=resource_group, 82 | workspace_name="mlserving", 83 | auth=msi_auth) 84 | 85 | # Register model version 86 | model = Model.register(workspace=ws, model_name=model_name, model_path=local_path) 87 | return ws, model 88 | 89 | 90 | def get_azure_vm_metadata() -> tuple[str, str]: 91 | metadata_url = "http://169.254.169.254/metadata/instance?api-version=2021-02-01" 92 | headers = { 93 | "Metadata": "true" 94 | } 95 | response = requests.get(metadata_url, headers=headers) 96 | if response.status_code == 200: 97 | metadata = response.json() 98 | subscription_id = metadata['compute']['subscriptionId'] 99 | resource_group = metadata['compute']['resourceGroupName'] 100 | return subscription_id, resource_group 101 | else: 102 | return "Failed to retrieve metadata", response.status_code 103 | 104 | 105 | tickers = ['AMZN', 'AXP'] 106 | _, resource_group = get_azure_vm_metadata() 107 | account_name = resource_group + '3de90' 108 | container_data = "kafkadata" 109 | -------------------------------------------------------------------------------- /airflow/dags/custom_operators/custom_functions_kafka.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pandas as pd 4 | from kafka import KafkaConsumer 5 | from azure.identity import DefaultAzureCredential 6 | from azure.storage.blob import BlobServiceClient 7 | from .azure_utils import tickers, account_name, container_data 8 | 9 | 10 | def consume_kafka_task() -> None: 11 | """ 12 | Airflow task\n 13 | Consume latest data that Kafka received from Airbyte and save it locally 14 | """ 15 | # Kafka configuration 16 | topic_name = 'stock' 17 | bootstrap_servers = ['10.1.0.4:9092'] 18 | group_id = 'consumer' 19 | 20 | # Create a KafkaConsumer 21 | consumer = KafkaConsumer( 22 | topic_name, 23 | bootstrap_servers=bootstrap_servers, 24 | auto_offset_reset='latest', 25 | enable_auto_commit=True, # Enable auto commit if processing is simple 26 | group_id=group_id 27 | ) 28 | 29 | # Condition to consume data from topic 30 | consume = True 31 | 32 | # Init empty DataFrame 33 | columns = { 34 | 'timestamp': 'datetime64[ns]', 35 | 'close': 'float64', 36 | 'high': 'float64', 37 | 'open': 'float64', 38 | 'low': 'float64', 39 | 'volume': 'int64', 40 | 'symbol': 'object' 41 | } 42 | total_df = pd.DataFrame({key: pd.Series(dtype=type_) for key, type_ in columns.items()}) 43 | 44 | # Consume messages 45 | try: 46 | while consume: 47 | for message in consumer.poll(timeout_ms=1000).values(): 48 | for msg in message: 49 | # Decode the message value from bytes to a string 50 | message_value = msg.value.decode('utf-8') 51 | 52 | # Convert the JSON string to a Python dictionary 53 | data = json.loads(message_value) 54 | # Process the data 55 | chart_result = data['_airbyte_data']['chart']['result'][0] 56 | timestamps = chart_result['timestamp'] 57 | quotes = chart_result['indicators']['quote'][0] 58 | symbol = chart_result['meta']['symbol'] 59 | 60 | # Create a DataFrame from processed data 61 | df = pd.DataFrame({ 62 | 'timestamp': pd.to_datetime(timestamps, unit='s'), 63 | 'low': quotes['low'], 64 | 'close': quotes['close'], 65 | 'high': quotes['high'], 66 | 'open': quotes['open'], 67 | 'volume': quotes['volume'], 68 | 'symbol': [symbol] * len(timestamps) 69 | }) 70 | 71 | df[['low', 'close', 'high', 'open']] = df[['low', 'close', 'high', 'open']].round(2) 72 | 73 | # Filter last timestamp and add to initial DataFrame 74 | last_row_df = df.iloc[-110:] 75 | total_df = pd.concat([total_df, last_row_df], ignore_index=True) 76 | consume = not set(tickers).issubset(total_df['symbol']) 77 | 78 | # consumer.commit() 79 | finally: 80 | # Save data locally 81 | for i in tickers: 82 | for j in range(110, 2, -1): 83 | row = total_df[total_df['symbol'] == i].iloc[-j] 84 | if pd.isna(row['close']): 85 | continue 86 | else: 87 | date_path_format = row['timestamp'].strftime('%Y/%m/%d/%H/%M/') 88 | folder_path = f"data/{i}/{date_path_format}" 89 | date_name_format = row['timestamp'].strftime('%Y%m%d%H%M') 90 | os.makedirs(folder_path, exist_ok=True) 91 | filename = f"{i}{date_name_format}.pkl" 92 | temp_df = pd.DataFrame(total_df[total_df['symbol'] == i].iloc[-j:-j+1]) 93 | temp_df.to_pickle(folder_path+filename) 94 | consumer.close() 95 | 96 | 97 | def upload_blob_task() -> None: 98 | """ 99 | Airflow task\n 100 | Compare local data with Azure Blob Storage container and upload if necessary 101 | """ 102 | path = 'data/' 103 | url = f"https://{account_name}.blob.core.windows.net" 104 | credential = DefaultAzureCredential() 105 | blob_service_client = BlobServiceClient(account_url=url, credential=credential) 106 | container_client = blob_service_client.get_container_client(container_data) 107 | for subdir, dirs, files in os.walk(path): 108 | for file in files: 109 | file_path = os.path.join(subdir, file) 110 | blob_name = file_path.replace(path, '').replace('\\', '/').strip('/') 111 | blob_client = container_client.get_blob_client(blob_name) 112 | if not blob_client.exists(): 113 | print(f"Uploading new blob: {blob_name}") 114 | with open(file_path, "rb") as data: 115 | blob_client.upload_blob(data) 116 | else: 117 | print(f"Blob already exists, skipping: {blob_name}") 118 | -------------------------------------------------------------------------------- /airflow/dags/custom_operators/custom_functions_model.py: -------------------------------------------------------------------------------- 1 | import joblib 2 | from datetime import datetime 3 | import matplotlib.pyplot as plt 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import mean_squared_error, mean_absolute_error 6 | import mlflow 7 | import mlflow.tensorflow 8 | from mlflow.models import infer_signature 9 | from azureml.core import Model, Environment 10 | from azureml.core.model import InferenceConfig 11 | from azureml.core.webservice import AksWebservice 12 | from azureml.core.compute import AksCompute, ComputeTarget 13 | from azureml.core.conda_dependencies import CondaDependencies 14 | from .azure_utils import tickers, read_blob, auth_ws_register_model, account_name, container_data 15 | from .model_utils import preprocess_data, build_model, model_name 16 | from .mlflow_utils import compare_and_update_production_stage 17 | 18 | 19 | def train_model_task() -> str: 20 | """ 21 | Airflow task\n 22 | Train new version of model, log it to mlflow model registry with new run of experiment,\n 23 | | compare with model currently assigned to production and change assignment if new version is better 24 | """ 25 | mlflow.set_tracking_uri("http://10.4.0.4:5000") 26 | mlflow.set_experiment(model_name) 27 | mlflow.set_tag("mlflow.runName", f"{datetime.now()}".replace(' ', 'T')) 28 | unit_number = 50 29 | 30 | model = build_model((32, 12), unit_number) 31 | x, y, scaler, y_scaler = preprocess_data(32, read_blob, tickers, account_name, container_data) 32 | x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42) 33 | history = model.fit(x_train, y_train, epochs=30, batch_size=16, validation_data=(x_val, y_val)) 34 | 35 | plt.figure(figsize=(10, 5)) 36 | plt.plot(history.history['loss'], label='Train Loss') 37 | plt.plot(history.history['val_loss'], label='Validation Loss') 38 | plt.title('Model Loss Over Epochs') 39 | plt.ylabel('Loss') 40 | plt.xlabel('Epoch') 41 | plt.legend(loc='upper right') 42 | plt.savefig("loss_plot.png") 43 | 44 | predictions = model.predict(x_val) 45 | 46 | signature = infer_signature(x_val, predictions) 47 | mlflow.tensorflow.log_model( 48 | model, 49 | artifact_path="tfmodel", 50 | signature=signature, 51 | registered_model_name=model_name 52 | ) 53 | 54 | mse = mean_squared_error(y_val, predictions) 55 | mae = mean_absolute_error(y_val, predictions) 56 | joblib.dump(scaler, "scaler.joblib") 57 | joblib.dump(y_scaler, "y_scaler.joblib") 58 | mlflow.log_param("unit_number", unit_number) 59 | mlflow.log_metric("Mean Squared Error", mse) 60 | mlflow.log_metric("Mean Absolute Error", mae) 61 | mlflow.log_artifact("loss_plot.png") 62 | mlflow.log_artifact("scaler.joblib") 63 | mlflow.log_artifact("y_scaler.joblib") 64 | 65 | return compare_and_update_production_stage(model_name, mse, x_val, y_val) 66 | 67 | 68 | def deploy_azureml_task(**kwargs) -> None: 69 | """ 70 | Airflow task\n 71 | | Deploy mlflow production version of trained model to Azure ML real-time inference endpoint using AKS\n 72 | Invoke only if new version is set to production in train_model_task 73 | """ 74 | ti = kwargs['ti'] 75 | controller = ti.xcom_pull(task_ids='train_model_task') 76 | if controller == 'old_version': 77 | pass 78 | elif (controller == 'new_model') | (controller == 'new_version'): 79 | # Auth to Azure ML Workspace and register production version of trained model 80 | ws, model = auth_ws_register_model(model_name, tickers) 81 | 82 | # Create conda environment for score.py 83 | env = Environment('my-env') 84 | cd = CondaDependencies.create(pip_packages=['mlflow==2.12.1', 'azureml-defaults', 'numpy==1.23.5', 85 | 'scikit-learn==1.4.2', 'tensorflow==2.15.1'], 86 | python_version='3.10') 87 | env.python.conda_dependencies = cd 88 | 89 | # Create an inference configuration 90 | inference_config = InferenceConfig(entry_script='score.py', environment=env) 91 | 92 | # Create if not exists and set AKS deployment configuration 93 | deployment_config = AksWebservice.deploy_configuration(cpu_cores=1, memory_gb=1) 94 | try: 95 | aks_target = ws.compute_targets['lstm-aks-cluster'] 96 | except: 97 | prov_config = AksCompute.provisioning_configuration(location='polandcentral') 98 | aks_target = ComputeTarget.create(workspace=ws, name='lstm-aks-cluster', provisioning_configuration=prov_config) 99 | aks_target.wait_for_completion(show_output=True) 100 | 101 | # Deploy model to AKS target 102 | service = Model.deploy(workspace=ws, 103 | name='lstm-service', 104 | models=[model], 105 | inference_config=inference_config, 106 | deployment_config=deployment_config, 107 | deployment_target=aks_target, 108 | overwrite=True) 109 | service.wait_for_deployment(show_output=True) 110 | else: 111 | raise Exception("train_model_task return error") 112 | -------------------------------------------------------------------------------- /arm_templates/blob_storage/temp_blob_storage_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "storageAccountName": { 6 | "type": "String" 7 | }, 8 | "containerName": { 9 | "type": "String" 10 | } 11 | }, 12 | "variables": {}, 13 | "resources": [ 14 | { 15 | "type": "Microsoft.Storage/storageAccounts", 16 | "apiVersion": "2023-04-01", 17 | "name": "[parameters('storageAccountName')]", 18 | "location": "[resourceGroup().location]", 19 | "sku": { 20 | "name": "Standard_LRS", 21 | "tier": "Standard" 22 | }, 23 | "kind": "StorageV2", 24 | "properties": { 25 | "defaultToOAuthAuthentication": false, 26 | "allowCrossTenantReplication": false, 27 | "minimumTlsVersion": "TLS1_0", 28 | "allowBlobPublicAccess": true, 29 | "allowSharedKeyAccess": true, 30 | "networkAcls": { 31 | "bypass": "AzureServices", 32 | "virtualNetworkRules": [], 33 | "ipRules": [], 34 | "defaultAction": "Allow" 35 | }, 36 | "supportsHttpsTrafficOnly": true, 37 | "encryption": { 38 | "services": { 39 | "file": { 40 | "keyType": "Account", 41 | "enabled": true 42 | }, 43 | "blob": { 44 | "keyType": "Account", 45 | "enabled": true 46 | } 47 | }, 48 | "keySource": "Microsoft.Storage" 49 | }, 50 | "accessTier": "Hot" 51 | } 52 | }, 53 | { 54 | "type": "Microsoft.Storage/storageAccounts/blobServices", 55 | "apiVersion": "2023-04-01", 56 | "name": "[concat(parameters('storageAccountName'), '/default')]", 57 | "dependsOn": [ 58 | "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]" 59 | ], 60 | "sku": { 61 | "name": "Standard_LRS", 62 | "tier": "Standard" 63 | }, 64 | "properties": { 65 | "cors": { 66 | "corsRules": [] 67 | }, 68 | "deleteRetentionPolicy": { 69 | "allowPermanentDelete": false, 70 | "enabled": false 71 | } 72 | } 73 | }, 74 | { 75 | "type": "Microsoft.Storage/storageAccounts/fileServices", 76 | "apiVersion": "2023-04-01", 77 | "name": "[concat(parameters('storageAccountName'), '/default')]", 78 | "dependsOn": [ 79 | "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]" 80 | ], 81 | "sku": { 82 | "name": "Standard_LRS", 83 | "tier": "Standard" 84 | }, 85 | "properties": { 86 | "protocolSettings": { 87 | "smb": {} 88 | }, 89 | "cors": { 90 | "corsRules": [] 91 | }, 92 | "shareDeleteRetentionPolicy": { 93 | "enabled": true, 94 | "days": 7 95 | } 96 | } 97 | }, 98 | { 99 | "type": "Microsoft.Storage/storageAccounts/queueServices", 100 | "apiVersion": "2023-04-01", 101 | "name": "[concat(parameters('storageAccountName'), '/default')]", 102 | "dependsOn": [ 103 | "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]" 104 | ], 105 | "properties": { 106 | "cors": { 107 | "corsRules": [] 108 | } 109 | } 110 | }, 111 | { 112 | "type": "Microsoft.Storage/storageAccounts/tableServices", 113 | "apiVersion": "2023-04-01", 114 | "name": "[concat(parameters('storageAccountName'), '/default')]", 115 | "dependsOn": [ 116 | "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]" 117 | ], 118 | "properties": { 119 | "cors": { 120 | "corsRules": [] 121 | } 122 | } 123 | }, 124 | { 125 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 126 | "apiVersion": "2023-04-01", 127 | "name": "[concat(parameters('storageAccountName'), '/default/', parameters('containerName'))]", 128 | "dependsOn": [ 129 | "[resourceId('Microsoft.Storage/storageAccounts/blobServices', parameters('storageAccountName'), 'default')]", 130 | "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]" 131 | ], 132 | "properties": { 133 | "immutableStorageWithVersioning": { 134 | "enabled": false 135 | }, 136 | "defaultEncryptionScope": "$account-encryption-key", 137 | "denyEncryptionScopeOverride": false, 138 | "publicAccess": "Blob" 139 | } 140 | } 141 | ] 142 | } 143 | -------------------------------------------------------------------------------- /arm_templates/kafka_infra/kafka_infra_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "Username for the virtual machine." 9 | } 10 | }, 11 | "adminPassword": { 12 | "type": "securestring", 13 | "metadata": { 14 | "description": "Password for the virtual machine." 15 | } 16 | }, 17 | "tempStorageAccountName": { 18 | "type": "string", 19 | "metadata": { 20 | "description": "Temporary storage for deployment files." 21 | } 22 | }, 23 | "tempContainerName": { 24 | "type": "string" 25 | } 26 | }, 27 | "variables": { 28 | "virtualNetworkName": "kafka-vnet", 29 | "subnetName": "default", 30 | "networkInterfaceName": "kafka-nic", 31 | "networkSecurityGroupName": "kafka-nsg", 32 | "virtualMachineName": "kafka", 33 | "vmSize": "Standard_E2_v4", 34 | "imagePublisher": "canonical", 35 | "imageOffer": "0001-com-ubuntu-server-focal", 36 | "imageSKU": "20_04-lts-gen2", 37 | "location": "[resourceGroup().location]" 38 | }, 39 | "resources": [ 40 | { 41 | "type": "Microsoft.Network/virtualNetworks", 42 | "apiVersion": "2020-06-01", 43 | "name": "[variables('virtualNetworkName')]", 44 | "location": "[variables('location')]", 45 | "properties": { 46 | "addressSpace": { 47 | "addressPrefixes": ["10.1.0.0/16"] 48 | }, 49 | "subnets": [ 50 | { 51 | "name": "[variables('subnetName')]", 52 | "properties": { 53 | "addressPrefix": "10.1.0.0/24" 54 | } 55 | } 56 | ] 57 | } 58 | }, 59 | { 60 | "type": "Microsoft.Network/networkSecurityGroups", 61 | "apiVersion": "2020-06-01", 62 | "name": "[variables('networkSecurityGroupName')]", 63 | "location": "[variables('location')]", 64 | "properties": { 65 | "securityRules": [] 66 | } 67 | }, 68 | { 69 | "type": "Microsoft.Network/networkInterfaces", 70 | "apiVersion": "2020-06-01", 71 | "name": "[variables('networkInterfaceName')]", 72 | "location": "[variables('location')]", 73 | "dependsOn": [ 74 | "[resourceId('Microsoft.Network/virtualNetworks', variables('virtualNetworkName'))]", 75 | "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 76 | ], 77 | "properties": { 78 | "ipConfigurations": [ 79 | { 80 | "name": "ipconfig1", 81 | "properties": { 82 | "subnet": { 83 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', variables('virtualNetworkName'), variables('subnetName'))]" 84 | }, 85 | "privateIPAllocationMethod": "Dynamic" 86 | } 87 | } 88 | ], 89 | "networkSecurityGroup": { 90 | "id": "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 91 | } 92 | } 93 | }, 94 | { 95 | "type": "Microsoft.Compute/virtualMachines", 96 | "apiVersion": "2020-06-01", 97 | "name": "[variables('virtualMachineName')]", 98 | "location": "[variables('location')]", 99 | "dependsOn": [ 100 | "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 101 | ], 102 | "properties": { 103 | "hardwareProfile": { 104 | "vmSize": "[variables('vmSize')]" 105 | }, 106 | "storageProfile": { 107 | "imageReference": { 108 | "publisher": "[variables('imagePublisher')]", 109 | "offer": "[variables('imageOffer')]", 110 | "sku": "[variables('imageSKU')]", 111 | "version": "latest" 112 | }, 113 | "osDisk": { 114 | "createOption": "FromImage" 115 | } 116 | }, 117 | "osProfile": { 118 | "computerName": "[variables('virtualMachineName')]", 119 | "adminUsername": "[parameters('adminUsername')]", 120 | "adminPassword": "[parameters('adminPassword')]", 121 | "linuxConfiguration": { 122 | "disablePasswordAuthentication": false 123 | } 124 | }, 125 | "networkProfile": { 126 | "networkInterfaces": [ 127 | { 128 | "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 129 | } 130 | ] 131 | } 132 | }, 133 | "identity": { 134 | "type": "SystemAssigned" 135 | } 136 | }, 137 | { 138 | "type": "Microsoft.Compute/virtualMachines/extensions", 139 | "name": "[concat(variables('virtualMachineName'), '/config-app')]", 140 | "location": "[variables('location')]", 141 | "apiVersion": "2018-06-01", 142 | "dependsOn": [ 143 | "[resourceId('Microsoft.Compute/virtualMachines', variables('virtualMachineName'))]" 144 | ], 145 | "properties": { 146 | "publisher": "Microsoft.Azure.Extensions", 147 | "type": "CustomScript", 148 | "typeHandlerVersion": "2.1", 149 | "autoUpgradeMinorVersion": true, 150 | "settings": { 151 | "commandToExecute": "[concat('bash -c \"export HOME=/home/', parameters('adminUsername'), ' && cd $HOME && wget https://', parameters('tempStorageAccountName'), '.blob.core.windows.net/', parameters('tempContainerName'), '/kafka.tar.gz -O kafka.tar.gz && tar -xzvf kafka.tar.gz && cd kafka && chmod +x setup.sh && ./setup.sh &\"')]" 152 | } 153 | } 154 | } 155 | ], 156 | "outputs": {} 157 | } 158 | -------------------------------------------------------------------------------- /arm_templates/mlflow_infra/mlflow_infra_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "Username for the virtual machine." 9 | } 10 | }, 11 | "adminPassword": { 12 | "type": "securestring", 13 | "metadata": { 14 | "description": "Password for the virtual machine." 15 | } 16 | }, 17 | "tempStorageAccountName": { 18 | "type": "string", 19 | "metadata": { 20 | "description": "Temporary storage for deployment files." 21 | } 22 | }, 23 | "tempContainerName": { 24 | "type": "string" 25 | } 26 | }, 27 | "variables": { 28 | "virtualNetworkName": "mlflow-vnet", 29 | "subnetName": "default", 30 | "networkInterfaceName": "mlflow-nic", 31 | "networkSecurityGroupName": "mlflow-nsg", 32 | "virtualMachineName": "mlflow", 33 | "vmSize": "Standard_E2_v4", 34 | "imagePublisher": "canonical", 35 | "imageOffer": "0001-com-ubuntu-server-focal", 36 | "imageSKU": "20_04-lts-gen2", 37 | "location": "[resourceGroup().location]" 38 | }, 39 | "resources": [ 40 | { 41 | "type": "Microsoft.Network/virtualNetworks", 42 | "apiVersion": "2020-06-01", 43 | "name": "[variables('virtualNetworkName')]", 44 | "location": "[variables('location')]", 45 | "properties": { 46 | "addressSpace": { 47 | "addressPrefixes": ["10.4.0.0/16"] 48 | }, 49 | "subnets": [ 50 | { 51 | "name": "[variables('subnetName')]", 52 | "properties": { 53 | "addressPrefix": "10.4.0.0/24" 54 | } 55 | } 56 | ] 57 | } 58 | }, 59 | { 60 | "type": "Microsoft.Network/networkSecurityGroups", 61 | "apiVersion": "2020-06-01", 62 | "name": "[variables('networkSecurityGroupName')]", 63 | "location": "[variables('location')]", 64 | "properties": { 65 | "securityRules": [] 66 | } 67 | }, 68 | { 69 | "type": "Microsoft.Network/networkInterfaces", 70 | "apiVersion": "2020-06-01", 71 | "name": "[variables('networkInterfaceName')]", 72 | "location": "[variables('location')]", 73 | "dependsOn": [ 74 | "[resourceId('Microsoft.Network/virtualNetworks', variables('virtualNetworkName'))]", 75 | "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 76 | ], 77 | "properties": { 78 | "ipConfigurations": [ 79 | { 80 | "name": "ipconfig1", 81 | "properties": { 82 | "subnet": { 83 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', variables('virtualNetworkName'), variables('subnetName'))]" 84 | }, 85 | "privateIPAllocationMethod": "Dynamic" 86 | } 87 | } 88 | ], 89 | "networkSecurityGroup": { 90 | "id": "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 91 | } 92 | } 93 | }, 94 | { 95 | "type": "Microsoft.Compute/virtualMachines", 96 | "apiVersion": "2020-06-01", 97 | "name": "[variables('virtualMachineName')]", 98 | "location": "[variables('location')]", 99 | "dependsOn": [ 100 | "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 101 | ], 102 | "properties": { 103 | "hardwareProfile": { 104 | "vmSize": "[variables('vmSize')]" 105 | }, 106 | "storageProfile": { 107 | "imageReference": { 108 | "publisher": "[variables('imagePublisher')]", 109 | "offer": "[variables('imageOffer')]", 110 | "sku": "[variables('imageSKU')]", 111 | "version": "latest" 112 | }, 113 | "osDisk": { 114 | "createOption": "FromImage" 115 | } 116 | }, 117 | "osProfile": { 118 | "computerName": "[variables('virtualMachineName')]", 119 | "adminUsername": "[parameters('adminUsername')]", 120 | "adminPassword": "[parameters('adminPassword')]", 121 | "linuxConfiguration": { 122 | "disablePasswordAuthentication": false 123 | } 124 | }, 125 | "networkProfile": { 126 | "networkInterfaces": [ 127 | { 128 | "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 129 | } 130 | ] 131 | } 132 | }, 133 | "identity": { 134 | "type": "SystemAssigned" 135 | } 136 | }, 137 | { 138 | "type": "Microsoft.Compute/virtualMachines/extensions", 139 | "name": "[concat(variables('virtualMachineName'), '/config-app')]", 140 | "location": "[variables('location')]", 141 | "apiVersion": "2018-06-01", 142 | "dependsOn": [ 143 | "[resourceId('Microsoft.Compute/virtualMachines', variables('virtualMachineName'))]" 144 | ], 145 | "properties": { 146 | "publisher": "Microsoft.Azure.Extensions", 147 | "type": "CustomScript", 148 | "typeHandlerVersion": "2.1", 149 | "autoUpgradeMinorVersion": true, 150 | "settings": { 151 | "commandToExecute": "[concat('bash -c \"export HOME=/home/', parameters('adminUsername'), ' && cd $HOME && wget https://', parameters('tempStorageAccountName'), '.blob.core.windows.net/', parameters('tempContainerName'), '/mlflow.tar.gz -O mlflow.tar.gz && tar -xzvf mlflow.tar.gz && cd mlflow && chmod +x setup.sh && ./setup.sh &\"')]" 152 | } 153 | } 154 | } 155 | ], 156 | "outputs": {} 157 | } 158 | -------------------------------------------------------------------------------- /arm_templates/airbyte_infra/airbyte_infra_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "Username for the virtual machine." 9 | } 10 | }, 11 | "adminPassword": { 12 | "type": "securestring", 13 | "metadata": { 14 | "description": "Password for the virtual machine." 15 | } 16 | }, 17 | "tempStorageAccountName": { 18 | "type": "string", 19 | "metadata": { 20 | "description": "Temporary storage for deployment files." 21 | } 22 | }, 23 | "tempContainerName": { 24 | "type": "string" 25 | } 26 | }, 27 | "variables": { 28 | "virtualNetworkName": "airbyte-vnet", 29 | "subnetName": "default", 30 | "networkInterfaceName": "airbyte-nic", 31 | "networkSecurityGroupName": "airbyte-nsg", 32 | "virtualMachineName": "airbyte", 33 | "vmSize": "Standard_E2_v4", 34 | "imagePublisher": "canonical", 35 | "imageOffer": "0001-com-ubuntu-server-focal", 36 | "imageSKU": "20_04-lts-gen2", 37 | "location": "[resourceGroup().location]" 38 | }, 39 | "resources": [ 40 | { 41 | "type": "Microsoft.Network/virtualNetworks", 42 | "apiVersion": "2020-06-01", 43 | "name": "[variables('virtualNetworkName')]", 44 | "location": "[variables('location')]", 45 | "properties": { 46 | "addressSpace": { 47 | "addressPrefixes": ["10.2.0.0/16"] 48 | }, 49 | "subnets": [ 50 | { 51 | "name": "[variables('subnetName')]", 52 | "properties": { 53 | "addressPrefix": "10.2.0.0/24" 54 | } 55 | } 56 | ] 57 | } 58 | }, 59 | { 60 | "type": "Microsoft.Network/networkSecurityGroups", 61 | "apiVersion": "2020-06-01", 62 | "name": "[variables('networkSecurityGroupName')]", 63 | "location": "[variables('location')]", 64 | "properties": { 65 | "securityRules": [] 66 | } 67 | }, 68 | { 69 | "type": "Microsoft.Network/networkInterfaces", 70 | "apiVersion": "2020-06-01", 71 | "name": "[variables('networkInterfaceName')]", 72 | "location": "[variables('location')]", 73 | "dependsOn": [ 74 | "[resourceId('Microsoft.Network/virtualNetworks', variables('virtualNetworkName'))]", 75 | "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 76 | ], 77 | "properties": { 78 | "ipConfigurations": [ 79 | { 80 | "name": "ipconfig1", 81 | "properties": { 82 | "subnet": { 83 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', variables('virtualNetworkName'), variables('subnetName'))]" 84 | }, 85 | "privateIPAllocationMethod": "Dynamic" 86 | } 87 | } 88 | ], 89 | "networkSecurityGroup": { 90 | "id": "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 91 | } 92 | } 93 | }, 94 | { 95 | "type": "Microsoft.Compute/virtualMachines", 96 | "apiVersion": "2020-06-01", 97 | "name": "[variables('virtualMachineName')]", 98 | "location": "[variables('location')]", 99 | "dependsOn": [ 100 | "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 101 | ], 102 | "properties": { 103 | "hardwareProfile": { 104 | "vmSize": "[variables('vmSize')]" 105 | }, 106 | "storageProfile": { 107 | "imageReference": { 108 | "publisher": "[variables('imagePublisher')]", 109 | "offer": "[variables('imageOffer')]", 110 | "sku": "[variables('imageSKU')]", 111 | "version": "latest" 112 | }, 113 | "osDisk": { 114 | "createOption": "FromImage" 115 | } 116 | }, 117 | "osProfile": { 118 | "computerName": "[variables('virtualMachineName')]", 119 | "adminUsername": "[parameters('adminUsername')]", 120 | "adminPassword": "[parameters('adminPassword')]", 121 | "linuxConfiguration": { 122 | "disablePasswordAuthentication": false 123 | } 124 | }, 125 | "networkProfile": { 126 | "networkInterfaces": [ 127 | { 128 | "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 129 | } 130 | ] 131 | } 132 | }, 133 | "identity": { 134 | "type": "SystemAssigned" 135 | } 136 | }, 137 | { 138 | "type": "Microsoft.Compute/virtualMachines/extensions", 139 | "name": "[concat(variables('virtualMachineName'), '/config-app')]", 140 | "location": "[variables('location')]", 141 | "apiVersion": "2018-06-01", 142 | "dependsOn": [ 143 | "[resourceId('Microsoft.Compute/virtualMachines', variables('virtualMachineName'))]" 144 | ], 145 | "properties": { 146 | "publisher": "Microsoft.Azure.Extensions", 147 | "type": "CustomScript", 148 | "typeHandlerVersion": "2.1", 149 | "autoUpgradeMinorVersion": true, 150 | "settings": { 151 | "commandToExecute": "[concat('bash -c \"export HOME=/home/', parameters('adminUsername'), ' && cd $HOME && wget https://', parameters('tempStorageAccountName'), '.blob.core.windows.net/', parameters('tempContainerName'), '/airbyte.tar.gz -O airbyte.tar.gz && tar -xzvf airbyte.tar.gz && cd airbyte && chmod +x setup.sh && ./setup.sh &\"')]" 152 | } 153 | } 154 | } 155 | ], 156 | "outputs": {} 157 | } 158 | -------------------------------------------------------------------------------- /arm_templates/airflow_infra/airflow_infra_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "adminUsername": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "Username for the virtual machine." 9 | } 10 | }, 11 | "adminPassword": { 12 | "type": "securestring", 13 | "metadata": { 14 | "description": "Password for the virtual machine." 15 | } 16 | }, 17 | "tempStorageAccountName": { 18 | "type": "string", 19 | "metadata": { 20 | "description": "Temporary storage for deployment files." 21 | } 22 | }, 23 | "tempContainerName": { 24 | "type": "string" 25 | } 26 | }, 27 | "variables": { 28 | "virtualNetworkName": "airflow-vnet", 29 | "subnetName": "default", 30 | "networkInterfaceName": "airflow-nic", 31 | "networkSecurityGroupName": "airflow-nsg", 32 | "virtualMachineName": "airflow", 33 | "vmSize": "Standard_E2_v4", 34 | "imagePublisher": "canonical", 35 | "imageOffer": "0001-com-ubuntu-server-focal", 36 | "imageSKU": "20_04-lts-gen2", 37 | "location": "[resourceGroup().location]" 38 | }, 39 | "resources": [ 40 | { 41 | "type": "Microsoft.Network/virtualNetworks", 42 | "apiVersion": "2020-06-01", 43 | "name": "[variables('virtualNetworkName')]", 44 | "location": "[variables('location')]", 45 | "properties": { 46 | "addressSpace": { 47 | "addressPrefixes": ["10.3.0.0/16"] 48 | }, 49 | "subnets": [ 50 | { 51 | "name": "[variables('subnetName')]", 52 | "properties": { 53 | "addressPrefix": "10.3.0.0/24" 54 | } 55 | } 56 | ] 57 | } 58 | }, 59 | { 60 | "type": "Microsoft.Network/networkSecurityGroups", 61 | "apiVersion": "2020-06-01", 62 | "name": "[variables('networkSecurityGroupName')]", 63 | "location": "[variables('location')]", 64 | "properties": { 65 | "securityRules": [] 66 | } 67 | }, 68 | { 69 | "type": "Microsoft.Network/networkInterfaces", 70 | "apiVersion": "2020-06-01", 71 | "name": "[variables('networkInterfaceName')]", 72 | "location": "[variables('location')]", 73 | "dependsOn": [ 74 | "[resourceId('Microsoft.Network/virtualNetworks', variables('virtualNetworkName'))]", 75 | "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 76 | ], 77 | "properties": { 78 | "ipConfigurations": [ 79 | { 80 | "name": "ipconfig1", 81 | "properties": { 82 | "subnet": { 83 | "id": "[resourceId('Microsoft.Network/virtualNetworks/subnets', variables('virtualNetworkName'), variables('subnetName'))]" 84 | }, 85 | "privateIPAllocationMethod": "Dynamic" 86 | } 87 | } 88 | ], 89 | "networkSecurityGroup": { 90 | "id": "[resourceId('Microsoft.Network/networkSecurityGroups', variables('networkSecurityGroupName'))]" 91 | } 92 | } 93 | }, 94 | { 95 | "type": "Microsoft.Compute/virtualMachines", 96 | "apiVersion": "2020-06-01", 97 | "name": "[variables('virtualMachineName')]", 98 | "location": "[variables('location')]", 99 | "dependsOn": [ 100 | "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 101 | ], 102 | "properties": { 103 | "hardwareProfile": { 104 | "vmSize": "[variables('vmSize')]" 105 | }, 106 | "storageProfile": { 107 | "imageReference": { 108 | "publisher": "[variables('imagePublisher')]", 109 | "offer": "[variables('imageOffer')]", 110 | "sku": "[variables('imageSKU')]", 111 | "version": "latest" 112 | }, 113 | "osDisk": { 114 | "createOption": "FromImage" 115 | } 116 | }, 117 | "osProfile": { 118 | "computerName": "[variables('virtualMachineName')]", 119 | "adminUsername": "[parameters('adminUsername')]", 120 | "adminPassword": "[parameters('adminPassword')]", 121 | "linuxConfiguration": { 122 | "disablePasswordAuthentication": false 123 | } 124 | }, 125 | "networkProfile": { 126 | "networkInterfaces": [ 127 | { 128 | "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]" 129 | } 130 | ] 131 | } 132 | }, 133 | "identity": { 134 | "type": "SystemAssigned" 135 | } 136 | }, 137 | { 138 | "type": "Microsoft.Compute/virtualMachines/extensions", 139 | "name": "[concat(variables('virtualMachineName'), '/config-app')]", 140 | "location": "[variables('location')]", 141 | "apiVersion": "2018-06-01", 142 | "dependsOn": [ 143 | "[resourceId('Microsoft.Compute/virtualMachines', variables('virtualMachineName'))]" 144 | ], 145 | "properties": { 146 | "publisher": "Microsoft.Azure.Extensions", 147 | "type": "CustomScript", 148 | "typeHandlerVersion": "2.1", 149 | "autoUpgradeMinorVersion": true, 150 | "settings": { 151 | "commandToExecute": "[concat('bash -c \"export HOME=/home/', parameters('adminUsername'), ' && cd $HOME && wget https://', parameters('tempStorageAccountName'), '.blob.core.windows.net/', parameters('tempContainerName'), '/airflow.tar.gz -O airflow.tar.gz && tar -xzvf airflow.tar.gz && cd airflow && chmod +x setup.sh && ./setup.sh &\"')]" 152 | } 153 | } 154 | } 155 | ], 156 | "outputs": {} 157 | } 158 | -------------------------------------------------------------------------------- /airflow/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | x-airflow-common: 2 | &airflow-common 3 | # image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.9.0} 4 | build: . 5 | environment: 6 | &airflow-common-env 7 | AIRFLOW__CORE__EXECUTOR: CeleryExecutor 8 | AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow 9 | AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow 10 | AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 11 | AIRFLOW__CORE__FERNET_KEY: '' 12 | AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' 13 | AIRFLOW__CORE__LOAD_EXAMPLES: 'true' 14 | AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' 15 | AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' 16 | _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} 17 | volumes: 18 | - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags 19 | - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs 20 | - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config 21 | - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins 22 | user: "${AIRFLOW_UID:-50000}:0" 23 | depends_on: 24 | &airflow-common-depends-on 25 | redis: 26 | condition: service_healthy 27 | postgres: 28 | condition: service_healthy 29 | 30 | services: 31 | postgres: 32 | image: postgres:13 33 | environment: 34 | POSTGRES_USER: airflow 35 | POSTGRES_PASSWORD: airflow 36 | POSTGRES_DB: airflow 37 | volumes: 38 | - postgres-db-volume:/var/lib/postgresql/data 39 | healthcheck: 40 | test: ["CMD", "pg_isready", "-U", "airflow"] 41 | interval: 10s 42 | retries: 5 43 | start_period: 5s 44 | restart: always 45 | 46 | redis: 47 | image: redis:latest 48 | expose: 49 | - 6379 50 | healthcheck: 51 | test: ["CMD", "redis-cli", "ping"] 52 | interval: 10s 53 | timeout: 30s 54 | retries: 50 55 | start_period: 30s 56 | restart: always 57 | 58 | airflow-webserver: 59 | <<: *airflow-common 60 | command: webserver 61 | ports: 62 | - "8080:8080" 63 | healthcheck: 64 | test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] 65 | interval: 30s 66 | timeout: 10s 67 | retries: 5 68 | start_period: 30s 69 | restart: always 70 | depends_on: 71 | <<: *airflow-common-depends-on 72 | airflow-init: 73 | condition: service_completed_successfully 74 | 75 | airflow-scheduler: 76 | <<: *airflow-common 77 | command: scheduler 78 | healthcheck: 79 | test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] 80 | interval: 30s 81 | timeout: 10s 82 | retries: 5 83 | start_period: 30s 84 | restart: always 85 | depends_on: 86 | <<: *airflow-common-depends-on 87 | airflow-init: 88 | condition: service_completed_successfully 89 | 90 | airflow-worker: 91 | <<: *airflow-common 92 | command: celery worker 93 | healthcheck: 94 | # yamllint disable rule:line-length 95 | test: 96 | - "CMD-SHELL" 97 | - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' 98 | interval: 30s 99 | timeout: 10s 100 | retries: 5 101 | start_period: 30s 102 | environment: 103 | <<: *airflow-common-env 104 | # Required to handle warm shutdown of the celery workers properly 105 | # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation 106 | DUMB_INIT_SETSID: "0" 107 | restart: always 108 | depends_on: 109 | <<: *airflow-common-depends-on 110 | airflow-init: 111 | condition: service_completed_successfully 112 | 113 | airflow-triggerer: 114 | <<: *airflow-common 115 | command: triggerer 116 | healthcheck: 117 | test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] 118 | interval: 30s 119 | timeout: 10s 120 | retries: 5 121 | start_period: 30s 122 | restart: always 123 | depends_on: 124 | <<: *airflow-common-depends-on 125 | airflow-init: 126 | condition: service_completed_successfully 127 | 128 | airflow-init: 129 | <<: *airflow-common 130 | entrypoint: /bin/bash 131 | # yamllint disable rule:line-length 132 | command: 133 | - -c 134 | - | 135 | if [[ -z "${AIRFLOW_UID}" ]]; then 136 | echo 137 | echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" 138 | echo "If you are on Linux, you SHOULD follow the instructions below to set " 139 | echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." 140 | echo "For other operating systems you can get rid of the warning with manually created .env file:" 141 | echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" 142 | echo 143 | fi 144 | one_meg=1048576 145 | mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) 146 | cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) 147 | disk_available=$$(df / | tail -1 | awk '{print $$4}') 148 | warning_resources="false" 149 | if (( mem_available < 4000 )) ; then 150 | echo 151 | echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" 152 | echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" 153 | echo 154 | warning_resources="true" 155 | fi 156 | if (( cpus_available < 2 )); then 157 | echo 158 | echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" 159 | echo "At least 2 CPUs recommended. You have $${cpus_available}" 160 | echo 161 | warning_resources="true" 162 | fi 163 | if (( disk_available < one_meg * 10 )); then 164 | echo 165 | echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" 166 | echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" 167 | echo 168 | warning_resources="true" 169 | fi 170 | if [[ $${warning_resources} == "true" ]]; then 171 | echo 172 | echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" 173 | echo "Please follow the instructions to increase amount of resources available:" 174 | echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" 175 | echo 176 | fi 177 | mkdir -p /sources/logs /sources/dags /sources/plugins 178 | chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} 179 | exec /entrypoint airflow version 180 | # yamllint enable rule:line-length 181 | environment: 182 | <<: *airflow-common-env 183 | _AIRFLOW_DB_MIGRATE: 'true' 184 | _AIRFLOW_WWW_USER_CREATE: 'true' 185 | _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} 186 | _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} 187 | _PIP_ADDITIONAL_REQUIREMENTS: '' 188 | user: "0:0" 189 | volumes: 190 | - ${AIRFLOW_PROJ_DIR:-.}:/sources 191 | 192 | airflow-cli: 193 | <<: *airflow-common 194 | profiles: 195 | - debug 196 | environment: 197 | <<: *airflow-common-env 198 | CONNECTION_CHECK_MAX_COUNT: "0" 199 | command: 200 | - bash 201 | - -c 202 | - airflow 203 | 204 | flower: 205 | <<: *airflow-common 206 | command: celery flower 207 | profiles: 208 | - flower 209 | ports: 210 | - "5555:5555" 211 | healthcheck: 212 | test: ["CMD", "curl", "--fail", "http://localhost:5555/"] 213 | interval: 30s 214 | timeout: 10s 215 | retries: 5 216 | start_period: 30s 217 | restart: always 218 | depends_on: 219 | <<: *airflow-common-depends-on 220 | airflow-init: 221 | condition: service_completed_successfully 222 | 223 | volumes: 224 | postgres-db-volume: 225 | -------------------------------------------------------------------------------- /arm_templates/airflow_infra/azureml_private_link_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "mlworkspaceId": { 6 | "type": "String" 7 | } 8 | }, 9 | "variables": { 10 | "resourceGroup": "[resourceGroup().name]", 11 | "subscriptionId": "[subscription().subscriptionId]", 12 | "location": "[resourceGroup().location]", 13 | "vNetName": "airflow-vnet", 14 | "privateDnsZones_privatelink_api_azureml_ms_name": "privatelink.api.azureml.ms", 15 | "privateDnsZones_privatelink_notebooks_azure_net_name": "privatelink.notebooks.azure.net", 16 | "virtualNetworks_airflow_vnet_externalid": "[concat('/subscriptions/', variables('subscriptionId'), '/resourceGroups/', variables('resourceGroup'), '/providers/Microsoft.Network/virtualNetworks/', variables('vNetName'))]" 17 | }, 18 | "resources": [ 19 | { 20 | "type": "Microsoft.Network/privateDnsZones", 21 | "apiVersion": "2018-09-01", 22 | "name": "[variables('privateDnsZones_privatelink_api_azureml_ms_name')]", 23 | "location": "global", 24 | "properties": { 25 | "maxNumberOfRecordSets": 25000, 26 | "maxNumberOfVirtualNetworkLinks": 1000, 27 | "maxNumberOfVirtualNetworkLinksWithRegistration": 100, 28 | "numberOfRecordSets": 4, 29 | "numberOfVirtualNetworkLinks": 1, 30 | "numberOfVirtualNetworkLinksWithRegistration": 0, 31 | "provisioningState": "Succeeded" 32 | } 33 | }, 34 | { 35 | "type": "Microsoft.Network/privateDnsZones", 36 | "apiVersion": "2018-09-01", 37 | "name": "[variables('privateDnsZones_privatelink_notebooks_azure_net_name')]", 38 | "location": "global", 39 | "properties": { 40 | "maxNumberOfRecordSets": 25000, 41 | "maxNumberOfVirtualNetworkLinks": 1000, 42 | "maxNumberOfVirtualNetworkLinksWithRegistration": 100, 43 | "numberOfRecordSets": 2, 44 | "numberOfVirtualNetworkLinks": 1, 45 | "numberOfVirtualNetworkLinksWithRegistration": 0, 46 | "provisioningState": "Succeeded" 47 | } 48 | }, 49 | { 50 | "type": "Microsoft.Network/privateDnsZones/A", 51 | "apiVersion": "2018-09-01", 52 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/*.', parameters('mlworkspaceId'), '.inference.', variables('location'))]", 53 | "dependsOn": [ 54 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 55 | ], 56 | "properties": { 57 | "ttl": 10, 58 | "aRecords": [ 59 | { 60 | "ipv4Address": "10.3.0.8" 61 | } 62 | ] 63 | } 64 | }, 65 | { 66 | "type": "Microsoft.Network/privateDnsZones/A", 67 | "apiVersion": "2018-09-01", 68 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/', parameters('mlworkspaceId'), '.workspace.', variables('location'))]", 69 | "dependsOn": [ 70 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 71 | ], 72 | "properties": { 73 | "ttl": 10, 74 | "aRecords": [ 75 | { 76 | "ipv4Address": "10.3.0.6" 77 | } 78 | ] 79 | } 80 | }, 81 | { 82 | "type": "Microsoft.Network/privateDnsZones/A", 83 | "apiVersion": "2018-09-01", 84 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/', parameters('mlworkspaceId'), '.workspace.', variables('location'), '.cert')]", 85 | "dependsOn": [ 86 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 87 | ], 88 | "properties": { 89 | "ttl": 10, 90 | "aRecords": [ 91 | { 92 | "ipv4Address": "10.3.0.6" 93 | } 94 | ] 95 | } 96 | }, 97 | { 98 | "type": "Microsoft.Network/privateDnsZones/A", 99 | "apiVersion": "2018-09-01", 100 | "name": "[concat(variables('privateDnsZones_privatelink_notebooks_azure_net_name'), '/ml-', variables('resourceGroup'), '-', parameters('mlworkspaceId'), '.', variables('location'))]", 101 | "dependsOn": [ 102 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_notebooks_azure_net_name'))]" 103 | ], 104 | "properties": { 105 | "ttl": 10, 106 | "aRecords": [ 107 | { 108 | "ipv4Address": "10.3.0.7" 109 | } 110 | ] 111 | } 112 | }, 113 | { 114 | "type": "Microsoft.Network/privateDnsZones/SOA", 115 | "apiVersion": "2018-09-01", 116 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/@')]", 117 | "dependsOn": [ 118 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 119 | ], 120 | "properties": { 121 | "ttl": 3600, 122 | "soaRecord": { 123 | "email": "azureprivatedns-host.microsoft.com", 124 | "expireTime": 2419200, 125 | "host": "azureprivatedns.net", 126 | "minimumTtl": 10, 127 | "refreshTime": 3600, 128 | "retryTime": 300, 129 | "serialNumber": 1 130 | } 131 | } 132 | }, 133 | { 134 | "type": "Microsoft.Network/privateDnsZones/SOA", 135 | "apiVersion": "2018-09-01", 136 | "name": "[concat(variables('privateDnsZones_privatelink_notebooks_azure_net_name'), '/@')]", 137 | "dependsOn": [ 138 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_notebooks_azure_net_name'))]" 139 | ], 140 | "properties": { 141 | "ttl": 3600, 142 | "soaRecord": { 143 | "email": "azureprivatedns-host.microsoft.com", 144 | "expireTime": 2419200, 145 | "host": "azureprivatedns.net", 146 | "minimumTtl": 10, 147 | "refreshTime": 3600, 148 | "retryTime": 300, 149 | "serialNumber": 1 150 | } 151 | } 152 | }, 153 | { 154 | "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", 155 | "apiVersion": "2018-09-01", 156 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/oc36iyxncvugw')]", 157 | "location": "global", 158 | "dependsOn": [ 159 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 160 | ], 161 | "properties": { 162 | "registrationEnabled": false, 163 | "virtualNetwork": { 164 | "id": "[variables('virtualNetworks_airflow_vnet_externalid')]" 165 | } 166 | } 167 | }, 168 | { 169 | "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", 170 | "apiVersion": "2018-09-01", 171 | "name": "[concat(variables('privateDnsZones_privatelink_notebooks_azure_net_name'), '/oc36iyxncvugw')]", 172 | "location": "global", 173 | "dependsOn": [ 174 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_notebooks_azure_net_name'))]" 175 | ], 176 | "properties": { 177 | "registrationEnabled": false, 178 | "virtualNetwork": { 179 | "id": "[variables('virtualNetworks_airflow_vnet_externalid')]" 180 | } 181 | } 182 | } 183 | ] 184 | } 185 | -------------------------------------------------------------------------------- /arm_templates/function_app/function_azureml_private_link_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "mlworkspaceId": { 6 | "type": "String" 7 | }, 8 | "resourceGroup": { 9 | "type": "String" 10 | } 11 | }, 12 | "variables": { 13 | "subscriptionId": "[subscription().subscriptionId]", 14 | "location": "[resourceGroup().location]", 15 | "vNetName": "function-vnet", 16 | "privateDnsZones_privatelink_api_azureml_ms_name": "privatelink.api.azureml.ms", 17 | "privateDnsZones_privatelink_notebooks_azure_net_name": "privatelink.notebooks.azure.net", 18 | "virtualNetworks_function_vnet_externalid": "[concat('/subscriptions/', variables('subscriptionId'), '/resourceGroups/', parameters('resourceGroup'), '/providers/Microsoft.Network/virtualNetworks/', variables('vNetName'))]" 19 | }, 20 | "resources": [ 21 | { 22 | "type": "Microsoft.Network/privateDnsZones", 23 | "apiVersion": "2018-09-01", 24 | "name": "[variables('privateDnsZones_privatelink_api_azureml_ms_name')]", 25 | "location": "global", 26 | "properties": { 27 | "maxNumberOfRecordSets": 25000, 28 | "maxNumberOfVirtualNetworkLinks": 1000, 29 | "maxNumberOfVirtualNetworkLinksWithRegistration": 100, 30 | "numberOfRecordSets": 4, 31 | "numberOfVirtualNetworkLinks": 1, 32 | "numberOfVirtualNetworkLinksWithRegistration": 0, 33 | "provisioningState": "Succeeded" 34 | } 35 | }, 36 | { 37 | "type": "Microsoft.Network/privateDnsZones", 38 | "apiVersion": "2018-09-01", 39 | "name": "[variables('privateDnsZones_privatelink_notebooks_azure_net_name')]", 40 | "location": "global", 41 | "properties": { 42 | "maxNumberOfRecordSets": 25000, 43 | "maxNumberOfVirtualNetworkLinks": 1000, 44 | "maxNumberOfVirtualNetworkLinksWithRegistration": 100, 45 | "numberOfRecordSets": 2, 46 | "numberOfVirtualNetworkLinks": 1, 47 | "numberOfVirtualNetworkLinksWithRegistration": 0, 48 | "provisioningState": "Succeeded" 49 | } 50 | }, 51 | { 52 | "type": "Microsoft.Network/privateDnsZones/A", 53 | "apiVersion": "2018-09-01", 54 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/*.', parameters('mlworkspaceId'), '.inference.', variables('location'))]", 55 | "dependsOn": [ 56 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 57 | ], 58 | "properties": { 59 | "ttl": 10, 60 | "aRecords": [ 61 | { 62 | "ipv4Address": "10.5.0.6" 63 | } 64 | ] 65 | } 66 | }, 67 | { 68 | "type": "Microsoft.Network/privateDnsZones/A", 69 | "apiVersion": "2018-09-01", 70 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/', parameters('mlworkspaceId'), '.workspace.', variables('location'))]", 71 | "dependsOn": [ 72 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 73 | ], 74 | "properties": { 75 | "ttl": 10, 76 | "aRecords": [ 77 | { 78 | "ipv4Address": "10.5.0.4" 79 | } 80 | ] 81 | } 82 | }, 83 | { 84 | "type": "Microsoft.Network/privateDnsZones/A", 85 | "apiVersion": "2018-09-01", 86 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/', parameters('mlworkspaceId'), '.workspace.', variables('location'), '.cert')]", 87 | "dependsOn": [ 88 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 89 | ], 90 | "properties": { 91 | "ttl": 10, 92 | "aRecords": [ 93 | { 94 | "ipv4Address": "10.5.0.4" 95 | } 96 | ] 97 | } 98 | }, 99 | { 100 | "type": "Microsoft.Network/privateDnsZones/A", 101 | "apiVersion": "2018-09-01", 102 | "name": "[concat(variables('privateDnsZones_privatelink_notebooks_azure_net_name'), '/ml-', parameters('resourceGroup'), '-', parameters('mlworkspaceId'), '.', variables('location'))]", 103 | "dependsOn": [ 104 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_notebooks_azure_net_name'))]" 105 | ], 106 | "properties": { 107 | "ttl": 10, 108 | "aRecords": [ 109 | { 110 | "ipv4Address": "10.5.0.5" 111 | } 112 | ] 113 | } 114 | }, 115 | { 116 | "type": "Microsoft.Network/privateDnsZones/SOA", 117 | "apiVersion": "2018-09-01", 118 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/@')]", 119 | "dependsOn": [ 120 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 121 | ], 122 | "properties": { 123 | "ttl": 3600, 124 | "soaRecord": { 125 | "email": "azureprivatedns-host.microsoft.com", 126 | "expireTime": 2419200, 127 | "host": "azureprivatedns.net", 128 | "minimumTtl": 10, 129 | "refreshTime": 3600, 130 | "retryTime": 300, 131 | "serialNumber": 1 132 | } 133 | } 134 | }, 135 | { 136 | "type": "Microsoft.Network/privateDnsZones/SOA", 137 | "apiVersion": "2018-09-01", 138 | "name": "[concat(variables('privateDnsZones_privatelink_notebooks_azure_net_name'), '/@')]", 139 | "dependsOn": [ 140 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_notebooks_azure_net_name'))]" 141 | ], 142 | "properties": { 143 | "ttl": 3600, 144 | "soaRecord": { 145 | "email": "azureprivatedns-host.microsoft.com", 146 | "expireTime": 2419200, 147 | "host": "azureprivatedns.net", 148 | "minimumTtl": 10, 149 | "refreshTime": 3600, 150 | "retryTime": 300, 151 | "serialNumber": 1 152 | } 153 | } 154 | }, 155 | { 156 | "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", 157 | "apiVersion": "2018-09-01", 158 | "name": "[concat(variables('privateDnsZones_privatelink_api_azureml_ms_name'), '/oc36iyxncvugx')]", 159 | "location": "global", 160 | "dependsOn": [ 161 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_api_azureml_ms_name'))]" 162 | ], 163 | "properties": { 164 | "registrationEnabled": false, 165 | "virtualNetwork": { 166 | "id": "[variables('virtualNetworks_function_vnet_externalid')]" 167 | } 168 | } 169 | }, 170 | { 171 | "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", 172 | "apiVersion": "2018-09-01", 173 | "name": "[concat(variables('privateDnsZones_privatelink_notebooks_azure_net_name'), '/oc36iyxncvugx')]", 174 | "location": "global", 175 | "dependsOn": [ 176 | "[resourceId('Microsoft.Network/privateDnsZones', variables('privateDnsZones_privatelink_notebooks_azure_net_name'))]" 177 | ], 178 | "properties": { 179 | "registrationEnabled": false, 180 | "virtualNetwork": { 181 | "id": "[variables('virtualNetworks_function_vnet_externalid')]" 182 | } 183 | } 184 | } 185 | ] 186 | } 187 | -------------------------------------------------------------------------------- /arm_templates/function_app/function_app_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "name": { 6 | "type": "String" 7 | } 8 | }, 9 | "variables": { 10 | "resourceGroup": "[resourceGroup().name]", 11 | "location": "[resourceGroup().location]", 12 | "subscriptionId": "[subscription().subscriptionId]", 13 | "vNetName": "function-vnet", 14 | "subnetName": "functionapp", 15 | "outboundSubnetDeployment": "outboundSubnetDeployment", 16 | "storageAccountName": "[parameters('name')]", 17 | "hostingPlanName": "[concat('ASP-', parameters('name'), '-be98')]", 18 | "use32BitWorkerProcess": false, 19 | "ftpsState": "FtpsOnly", 20 | "linuxFxVersion": "Python|3.10", 21 | "sku": "ElasticPremium", 22 | "skuCode": "EP1", 23 | "workerSize": "3", 24 | "workerSizeId": "3", 25 | "numberOfWorkers": "1", 26 | "alwaysOn": false 27 | }, 28 | "resources": [ 29 | { 30 | "type": "Microsoft.Web/sites", 31 | "apiVersion": "2022-03-01", 32 | "name": "[parameters('name')]", 33 | "location": "[variables('location')]", 34 | "dependsOn": [ 35 | "[concat('Microsoft.Web/serverfarms/', variables('hostingPlanName'))]", 36 | "[concat('Microsoft.Storage/storageAccounts/', variables('storageAccountName'))]", 37 | "[variables('outboundSubnetDeployment')]" 38 | ], 39 | "tags": {}, 40 | "kind": "functionapp,linux", 41 | "identity": { 42 | "type": "SystemAssigned" 43 | }, 44 | "properties": { 45 | "name": "[parameters('name')]", 46 | "siteConfig": { 47 | "appSettings": [ 48 | { 49 | "name": "FUNCTIONS_EXTENSION_VERSION", 50 | "value": "~4" 51 | }, 52 | { 53 | "name": "FUNCTIONS_WORKER_RUNTIME", 54 | "value": "python" 55 | }, 56 | { 57 | "name": "AzureWebJobsStorage", 58 | "value": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), '2019-06-01').keys[0].value,';EndpointSuffix=','core.windows.net')]" 59 | }, 60 | { 61 | "name": "WEBSITE_CONTENTAZUREFILECONNECTIONSTRING", 62 | "value": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), '2019-06-01').keys[0].value,';EndpointSuffix=','core.windows.net')]" 63 | }, 64 | { 65 | "name": "WEBSITE_CONTENTSHARE", 66 | "value": "[concat(parameters('name'), 'ba17')]" 67 | }, 68 | { 69 | "name": "subscriptionId", 70 | "value": "[variables('subscriptionId')]" 71 | }, 72 | { 73 | "name": "resourceGroup", 74 | "value": "[variables('resourceGroup')]" 75 | } 76 | ], 77 | "cors": { 78 | "allowedOrigins": [ 79 | "https://portal.azure.com" 80 | ] 81 | }, 82 | "use32BitWorkerProcess": "[variables('use32BitWorkerProcess')]", 83 | "ftpsState": "[variables('ftpsState')]", 84 | "linuxFxVersion": "[variables('linuxFxVersion')]" 85 | }, 86 | "clientAffinityEnabled": false, 87 | "virtualNetworkSubnetId": "[resourceId(variables('resourceGroup'), 'Microsoft.Network/virtualNetworks/subnets', variables('vNetName'), variables('subnetName'))]", 88 | "publicNetworkAccess": "Enabled", 89 | "vnetRouteAllEnabled": true, 90 | "httpsOnly": true, 91 | "serverFarmId": "[concat('/subscriptions/', variables('subscriptionId'),'/resourcegroups/', variables('resourceGroup'), '/providers/Microsoft.Web/serverfarms/', variables('hostingPlanName'))]" 92 | }, 93 | "resources": [ 94 | { 95 | "type": "Microsoft.Web/sites/basicPublishingCredentialsPolicies", 96 | "apiVersion": "2022-09-01", 97 | "name": "[concat(parameters('name'), '/scm')]", 98 | "dependsOn": [ 99 | "[resourceId('Microsoft.Web/Sites', parameters('name'))]" 100 | ], 101 | "properties": { 102 | "allow": false 103 | } 104 | }, 105 | { 106 | "type": "Microsoft.Web/sites/basicPublishingCredentialsPolicies", 107 | "apiVersion": "2022-09-01", 108 | "name": "[concat(parameters('name'), '/ftp')]", 109 | "dependsOn": [ 110 | "[resourceId('Microsoft.Web/Sites', parameters('name'))]" 111 | ], 112 | "properties": { 113 | "allow": false 114 | } 115 | } 116 | ] 117 | }, 118 | { 119 | "type": "Microsoft.Web/serverfarms", 120 | "apiVersion": "2018-11-01", 121 | "name": "[variables('hostingPlanName')]", 122 | "location": "[variables('location')]", 123 | "dependsOn": [], 124 | "tags": {}, 125 | "sku": { 126 | "Tier": "[variables('sku')]", 127 | "Name": "[variables('skuCode')]" 128 | }, 129 | "kind": "linux", 130 | "properties": { 131 | "name": "[variables('hostingPlanName')]", 132 | "workerSize": "[variables('workerSize')]", 133 | "workerSizeId": "[variables('workerSizeId')]", 134 | "numberOfWorkers": "[variables('numberOfWorkers')]", 135 | "reserved": true, 136 | "maximumElasticWorkerCount": "20", 137 | "zoneRedundant": false 138 | } 139 | }, 140 | { 141 | "type": "Microsoft.Resources/deployments", 142 | "apiVersion": "2020-07-01", 143 | "name": "[variables('outboundSubnetDeployment')]", 144 | "dependsOn": [], 145 | "properties": { 146 | "mode": "Incremental", 147 | "template": { 148 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 149 | "contentVersion": "1.0.0.0", 150 | "parameters": {}, 151 | "variables": {}, 152 | "resources": [ 153 | { 154 | "type": "Microsoft.Network/virtualNetworks/subnets", 155 | "apiVersion": "2020-07-01", 156 | "name": "[concat(variables('vNetName'), '/', variables('subnetName'))]", 157 | "properties": { 158 | "delegations": [ 159 | { 160 | "name": "delegation", 161 | "properties": { 162 | "serviceName": "Microsoft.Web/serverfarms" 163 | } 164 | } 165 | ], 166 | "serviceEndpoints": [ 167 | { 168 | "service": "Microsoft.Storage" 169 | } 170 | ], 171 | "addressPrefix": "10.5.1.0/24" 172 | } 173 | } 174 | ] 175 | } 176 | }, 177 | "subscriptionId": "[variables('subscriptionId')]", 178 | "resourceGroup": "[variables('resourceGroup')]" 179 | }, 180 | { 181 | "type": "Microsoft.Storage/storageAccounts", 182 | "apiVersion": "2022-05-01", 183 | "name": "[variables('storageAccountName')]", 184 | "location": "[variables('location')]", 185 | "tags": {}, 186 | "sku": { 187 | "name": "Standard_LRS" 188 | }, 189 | "kind": "StorageV2", 190 | "properties": { 191 | "supportsHttpsTrafficOnly": true, 192 | "minimumTlsVersion": "TLS1_2", 193 | "defaultToOAuthAuthentication": true 194 | } 195 | } 196 | ] 197 | } 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://dev.azure.com/jjuzaszek/LSTM_Attention_redeployment_for_yahoo_stock_data/_apis/build/status%2FJuliuszB12.LSTM_Attention_redeployment_for_yahoo_stock_data?branchName=main)](https://dev.azure.com/jjuzaszek/LSTM_Attention_redeployment_for_yahoo_stock_data/_build/latest?definitionId=20&branchName=main) 2 | 3 | ## High-level architecture overview 4 | ![architecture](https://github.com/JuliuszB12/LSTM_Attention_redeployment_for_yahoo_stock_data/assets/68758875/e71dc045-df45-497d-8fc4-bea09abbfcb2) 5 | 6 | ## Abstract 7 | Airbyte with yahoo source connector is sending data to Kafka topic at 1-minute intervals about 1-minute prices and volumes of chosen stocks. 8 | At the same time Airflow DAG consuming that data and storing it in Azure blob storage. Every 15 minutes different Airflow DAG fetches updated blob storage data to retrain LSTM Attention machine learning model for predicting next 1-minute stock close price based on previous time-series of prices and calculated technical indicators and log it (with related utils like fitted data scalers) as a next run of experiment to MLflow model registry. After that new version of model is compared on new validation data to model assigned in MLflow with production alias to either retain previous version of model as production version or swap alias and deploy new version of model to Azure ML Studio real-time inference endpoint hosted on Azure Kubernetes cluster. Described operations are executed within peered private networks and all privileges to access different resources are result of system-assigned managed identities of resources from which the code is executed without explicit mentioning of any connection secrets. Predictions of hosted model can be fetched from endpoint through Azure Function behind Azure API Management. Checking the quality of Python code and complete deployment of both infrastructure and containers is fully managed by Azure DevOps CI/CD pipelines within azure-pipelines.yaml file. 9 | 10 | Development tech stack: TensorFlow/Keras, Kafka Python Client, Azure Python Client, Azure Resource Manager template, Custom Script Extension, Docker Compose, azure-pipelines.yaml, Bash, PowerShell, Azure CLI 11 | 12 | ## Step-by-step deployment 13 | 1. Go Azure Portal -> Azure DevOps organizations -> My Azure DevOps Organizations -> Create new organization -> New Project 14 | 2. Go Project settings -> Service connections -> Create new GitHub and Azure Subscription connections (select "Azure Resource Manager" connection type) 15 | 3. Go Pipelines -> Pipelines -> Create Pipeline -> GitHub -> choose repo with project -> azure-pipelines.yaml in repo will be automatically identified 16 | 4. Set Azure Subscription ID and name for new resource group in azure-pipelines.yaml 17 | 5. Go Azure Portal -> Subscriptions -> choose subscription -> Access control (IAM) -> Add role assignment -> Privileged administrator roles -> User Access Administrator -> select service principal of Azure DevOps project -> set recommended setting in Conditions tab -> Review + assign 18 | 6. Run pipeline and wait for completion (around 50 minutes) 19 | 7. Project is operational. API call to https://<resourceGroupName>a1l45.azure-api.net/function/ for model inference will succeed after enough time to collect some data, train and initially deploy the first version of a model (around 45 minutes). To make proper API call follow function_api/example_api_call/example_api_call.py 20 | 21 |  NOTE: 22 |  Minimal requirements of the compute quotas allowed on chosen Azure subscription 23 |  to successfully deploy the project with default settings: 24 |   Poland Central Standard Dv2 Family vCPUs 12 25 |   Poland Central Standard Ev4 Family vCPUs 8 26 |   Poland Central Total Regional vCPUs 20 27 |  Estimated cost for default project settings : 2$ per hour 28 | 29 | ## Content overview 30 | Overview of the files in the repository 31 | 32 | **azure-pipelines.yaml** - set of CI/CD instructions for Azure DevOps Pipelines to deploy the entire project from scratch to an operational state 33 | 34 | **airbyte** 35 | - setup.sh - script to deploy Airbyte docker containers and invoke Bash script to establish yahoo -> kafka connection pipeline 36 | - connection_setup.sh - script to establish yahoo -> kafka connection pipeline in Airbyte 37 | 38 | **airflow** 39 | - dags/kafka_dag.py - Airflow DAG: extract data from kafka, transform it and upload to Azure Blob Storage 40 | - dags/model_dag.py - Airflow DAG: extract data from Azure Blob Storage, train new version of machine learning model, fetch production version from MLflow, compare versions, if new version is better swap versions in MLflow and deploy new version to Azure Machine Learning Studio real-time inference endpoint hosted on Azure Kubernetes cluster 41 | - dags/custom_operators/custom_functions_kafka.py - functions for PythonOperator tasks in kafka_dag.py 42 | - dags/custom_operators/custom_functions_model.py - functions for PythonOperator tasks in azure_dag.py 43 | - Custom dependencies for custom_functions_kafka.py and custom_functions_model.py: 44 |  dags/custom_operators/azure_utils 45 |  dags/custom_operators/model_utils 46 |  dags/custom_operators/mlflow_utils 47 | - score.py - file executed inside Azure ML Studio real-time inference endpoint during its creation and usage, 48 | the use of global variables in this file is imposed by Azure documentation 49 | - setup.sh - script to deploy Airflow docker containers 50 | 51 | **kafka** 52 | - setup.sh - script to deploy Kafka docker containers 53 | 54 | **mlflow** 55 | - setup.sh - script to deploy MLflow docker containers 56 | 57 | **function_api** 58 | - function_app/function/\_\_init\_\_.py - function to fetch model result from Azure ML Studio endpoint 59 | - function_app/function/function.json - configuration file for that function 60 | - function_app/host.json - configuration file for Azure Function App 61 | - apimanagement.sh - Azure CLI bash commands to deploy API Management service and API for Function App 62 | - example_api_call/example_api_call.py - example post request with test data to the ready API after successful deployment 63 | 64 | **scripts** 65 | - roles_assignment.ps1 - Roles assignment between Azure services 66 | 67 | **arm_templates** 68 |  All Azure Resource Manager templates serving as Infrastructure as Code used by Azure DevOps Pipelines for Continuous Deployment 69 | - airbyte_infra_template.json - Azure Virtual Machine as Airbyte host with dependencies (Azure Virtual Network, Azure Disk Storage, Azure Network Interface and Security Group) and Custom Script Extension that invokes Bash script airbyte/setup.sh on Linux 70 | - airflow_infra_template.json - Azure Virtual Machine as Airflow host with dependencies (Azure Virtual Network, Azure Disk Storage, Azure Network Interface and Security Group) and Custom Script Extension that invokes Bash script airflow/setup.sh on Linux 71 | - kafka_infra_template.json - Azure Virtual Machine as Kafka host with dependencies (Azure Virtual Network, Azure Disk Storage, Azure Network Interface and Security Group) and Custom Script Extension that invokes Bash script kafka/setup.sh on Linux 72 | - mlflow_infra_template.json - Azure Virtual Machine as MLflow host with dependencies (Azure Virtual Network, Azure Disk Storage, Azure Network Interface and Security Group) and Custom Script Extension that invokes Bash script mlflow/setup.sh on Linux 73 | - airflow_mlstudio_private_endpoint_template.json - Private endpoint from Airflow host Virtual Network to Azure Machine Learing Studio 74 | - airflow_storage_private_endpoint_template.json - Private endpoint from Airflow host Virtual Network to Azure Blob Storage 75 | - azureml_private_link_template.json - Private DNS Zone for Azure Machine Learning Studio with necessary DNS records 76 | - blob_storage_private_link_template.json - Private DNS Zone for Azure Blob Storage with necessary DNS records 77 | - blob_storage_template.json - Azure Blob Storage 78 | - temp_blob_storage_template.json - Azure Blob Storage for deployment utilities 79 | - azureml_template.json - Azure Machine Learning Studio 80 | - function_app_template.json - Azure Function App 81 | - functionapp_vnet_template.json - Outbound Virtual Network for Azure Function App 82 | - function_azureml_private_endpoint_template.json - Private endpoint from Azure Function App outbound Virtual Network to Azure Machine Learing Studio 83 | - function_azureml_private_link_template.json - Private DNS Zone for Azure Machine Learning Studio with necessary DNS records for Azure Function App 84 | - vnet_peering_template.json - Virtual Network Peering 85 | 86 | ## Infrastructure overview 87 | **Azure Virtual Machine**: Provides scalable, on-demand computing resources for running applications and workloads in the cloud. 88 | **Azure Virtual Network**: Enables secure, isolated, and logically segmented network environments within the Azure cloud. 89 | **Azure VNet Peering**: Allows seamless, low-latency connectivity between two Azure Virtual Networks. 90 | **Azure Blob Storage Account**: Offers scalable object storage for unstructured data such as documents, media files, and backups. 91 | **Azure Machine Learning Studio Workspace**: Provides a collaborative environment for data scientists to build, train, and deploy machine learning models. 92 | **Azure Function App**: Enables serverless computing by allowing the execution of event-driven functions without managing infrastructure. 93 | **Azure API Management**: Facilitates the creation, management, and security of APIs at scale. 94 | **Azure Private Endpoint**: Provides a secure connection to Azure services through a private link, ensuring data remains on the private network. 95 | **System-assigned Managed Identity**: Automatically manages credentials for accessing Azure services, providing secure identity and access management for applications. 96 | **Airbyte**: An open-source data integration platform for extracting, transforming, and loading data from various sources. 97 | **Kafka**: A distributed streaming platform used for building real-time data pipelines and streaming applications, enabling low-latency data transmission and processing. 98 | **Airflow**: An open-source workflow management platform for scheduling and monitoring complex data pipelines. 99 | **MLflow**: An open-source platform for managing the machine learning lifecycle, including experimentation, reproducibility, and deployment. 100 | **Azure Machine Learning real-time inference endpoint on Azure Kubernetes cluster**: Deploys machine learning models to provide real-time predictions via scalable Kubernetes clusters in Azure. 101 | 102 | ## Features overview 103 | Overview of the project functionalities 104 | 105 | **Continuous Integration and Deployment via Azure DevOps Pipelines** 106 |  Set of CI/CD instructions is defined within azure-pipelines.yaml. Every time new commit or merge is made to desired repo branch, Azure Pipeline looks for that file in repository and execute it in the context of Azure subscription and newly created Azure Resource Group which are set as parameters. Azure Pipelines App integrated with GitHub repository informs about state of the execution in the details of assigned commit. CI/CD instructions are executed sequentially due to free-tier Azure Pipelines limitations. Order of high-level CI/CD instructions with corresponding tool used: 107 | - Assign Azure Subscription ID and new resource group name 108 | - Check the quality of Python code with flake8, isort, mypy, black and pylint (Bash) 109 | - Pack Azure Function App project and other services utils for deployment (Bash) 110 | - Dynamically resolve worldwide unique names for services like Azure Blob Storage, Function App or API Management (PowerShell) 111 | - Deploy temporary Azure Blob Storage for deployment utils (ARM template) 112 | - Send airbyte, kafka, airflow and mlflow packed content folders to temporary storage (Bash) 113 | - Deploy Azure ML Studio Workspace and dependencies (ARM template) 114 | - Deploy Azure Blob Storage for keeping stock prices data (ARM template) 115 | - Deploy Kafka with its infrastructure (ARM template + Custom Script Extension) 116 | - Deploy Airbyte with its infrastructure (ARM template + Custom Script Extension) and establish yahoo -> kafka connection pipeline (Bash) 117 | - Deploy MLflow with its infrastructure (ARM template + Custom Script Extension) 118 | - Deploy Airflow with its infrastructure (ARM template + Custom Script Extension) and start DAGs 119 | - Deploy VNet Peering between Virtual Networks for Airbyte and Kafka infra (ARM template) 120 | - Deploy VNet Peering between Virtual Networks for Kafka and Airflow infra (ARM template) 121 | - Deploy VNet Peering between Virtual Networks for Aiflow and MLflow infra (ARM template) 122 | - Deploy Private Endpoint to Azure Blob Storage from Virtual Network for Airflow infra (ARM template) 123 | - Deploy Private Endpoint to Azure ML Studio Workspace from Virtual Network for Airflow infra (ARM template) 124 | - Deploy private DNS zone required to connect Airflow infra to Azure Blob Storage (ARM template) 125 | - Deploy private DNS zone required to connect Airflow infra to Azure ML Studio Workspace (ARM template) 126 | - Deploy Virtual Network for Azure Function App (ARM template) 127 | - Deploy Azure Function App (ARM template) 128 | - Deploy function and all required packages to Azure Function App (Built-in Azure DevOps Pipeline task) 129 | - Deploy Private Endpoint from Azure Function App to Azure ML Studio Workspace (ARM template) 130 | - Deploy private DNS zone to connect Azure Function App to Azure ML Studio Workspace (ARM template) 131 | - Proceed all required roles assignment between services (Azure PowerShell) 132 | - Deploy API Management service and API for Azure Function App (Azure CLI) 133 | - Assign inbound policy to API for Azure Function App (Azure PowerShell) 134 | 135 | **Airbyte data extraction and loading to Kafka** 136 |  Airbyte is a tool for data integration between various systems with exisiting Airbyte built-in connectors. 137 | The idea is to choose pre-defined source and destination with proper dedicated configuration and connect them together within Airbyte connection. 138 | The chosen source for this project is Yahoo finance service with real-time stock data. With Kafka set as destination and with proper scheduler configuration, Airbyte can serve a similar role to Kafka Connect, but with less development effort. The configuration for source, destination and connection is specified within airbyte/connection_setup.sh and includes all the necessary dedicated configuration for Yahoo data as well as for Kafka producer and topic. The list of desired stock indexes can be set in "tickers" parameter for Bash "source_id" variable. Although Airbyte call to Yahoo API cannot be made with smaller granularity of "interval" parameter than 1 day, that can be overcome because the raw data from API response contains continuously updated timestamps and stock data with 1 minute granularity and it just requires more complicated post-processing to obtain it (done as part of Airflow DAG consuming that data from Kafka topic). Unprocessed data is sent to Kafka topic via private Azure network. Environement variable KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true in Bitnami Kafka docker-compose.yaml allows Airbyte to auto-create all required Kafka topics. 139 | 140 | **Airflow ETL from Kafka to Azure Blob Storage** 141 |  Airflow DAG "kafka_and_azure_blob_dag" specified within airflow/dags/kafka_dag.py extract raw Airbyte data from Kafka, transform it and load it to Azure Blob Storage Container. The reason for using Kafka instead of ingesting Airbyte data directly within Airflow DAG is the need for asynchronous architecture design due to unpredictable Airbyte and Yahoo behaviour that can result in failure and retry, and to avoid holding Airflow task for processing latest available data. Airflow PythonOperator task "consume_kafka_task" performs data processing in order to obtain desired state of last 1 minute stock data. The final result has structure of Pandas DataFrame with explicitly specified schema that can be read direclty in airflow/dags/custom_operators/custom_functions_kafka.py inside function "consume_kafka_task". DataFrame saved to .pkl format is then uploaded by next Airflow task to Azure Blob Storage as an object with the naming convention {ticker}/yyyy/mm/dd/HH/MM/file_name.pkl to allow efficient querying by ticker and date, and to display data in Azure Portal with folder-oriented structure. Airflow can reach Azure Blob Storage via private Azure network using Private Endpoint and System-Assigned Managed Identity that provides RBAC role to write data to Azure Blob Storage Container and is assigned to Airflow host. 142 | 143 | **Airflow LSTM Attention model training** 144 |  Airflow DAG "train_and_deploy_model_dag" specified within airflow/dags/model_dag.py extract data from Azure Blob Storage, prepare it for machine learning model training, train the model, compare it to current production version of model on new validation data and possibly decides to deploy it to Azure Machine Learning real-time inference endpoint hosted on Azure Kubernetes cluster. LSTM model is implemented with TensorFlow/Keras framework and with explicit implementation of Attention layer. Training process done by Airflow task "train_model_task" is every time registered as MLflow experiment to log all training statistics, model artifacts and utilities like fitted data scalers and to keep model versions registry. MLflow has system of aliases that allows for specific model versions marking that differentiate them in terms of current utility. Best model version that is currently deployed to production has appropriate alias and is used to assess new model versions. 145 | 146 | **Airflow LSTM Attention model deployment** 147 |  If new model version beats production model, it takes the production alias and is deployed to Azure inference endpoint by Airflow task "deploy_azureml_task" of Airflow DAG "train_and_deploy_model_dag". Behaviour of model in Azure inference endpoint is determined by airflow/score.py that is prepared according to Azure documentation guidelines. Two functions "init" and "run" from score.py are executed when model is deployed and invoked accordingly. Azure documentation imposes usage of global variables in this file to share them between deploying and invoking functions. Model artifact itself can actually be one of this variables, so all utilities concerning it are initialized during deployment and then used during invoking. Azure allows for creating Python conda environment for score.py execution, which allows to handle model artifact that came from MLflow using dedicated MLflow framework inside Azure inference endpoint. Airflow can reach Azure Machine Learning Studio via private Azure network using Private Endpoint and System-Assigned Managed Identity that provides RBAC role to upload model to Azure registry as well as to create endpoint and deploy model to it. 148 | 149 | **API to request model result** 150 |  To ensure proper endpoint usage tracking and management, it is not directly accessible for public users and can be reached only through dedicated API. This API is built with Python function deployed via Azure Function App as backend and with Azure API Management as frontend. The function is supposed to handle user request, connect to Azure Machine Learning Studio, request model result from real-time model inference endpoint and return response status to user, possibly with model result data if response status is ok. It is implemented within Azure Function App because of small scope of function task and its frequent but irregular use, which makes serverless computing infrastructure the right choice for the host. Function within Azure Function App can reach Azure Machine Learning Studio via private Azure network using Private Endpoint and System-Assigned Managed Identity that provides RBAC role to access inference endpoint. As Private Endpoint to Azure Machine Learning Studio requires Azure Virtual Network from which it is established and Azure Function App by default operates without it, thus it is required to connect Azure Function App to Azure Virtual Network that is supposed to serve as source network for outbound connections from Azure Function App. Managed Identity, together with custom Azure subscription ID and Azure Resouce Group name as function environment variables delivered from Azure DevOps Pipelines, allow for seamless implicit secure connectivity to Azure Machine Learning Studio and reusability of the project code for different releases and Azure accounts. The similar approach is used in other parts of the project. Function within Azure Function App is reachable through Azure API Management endpoint. Inbound policy is used for request made to Azure API to appropriately forward it to Azure Function App and Azure API Management Named values are used for secrets management. 151 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | - main 3 | 4 | variables: 5 | subscriptionId: '38ca6696-5c82-4571-b2af-bf3f256cf663' 6 | resourceGroupName: 'condeploymenttest09' # note that it will be parsed to max 19 lowercase alphanumeric chars anyway 7 | location: 'Poland Central' # changing this should be considered carefully (check supported functionalities and assigned compute quotas) 8 | azconnection: 'Azure subscription 1 ($(subscriptionId))' 9 | 10 | stages: 11 | - stage: Continuous_Integration 12 | jobs: 13 | - job: Continuous_Integration 14 | pool: 15 | vmImage: ubuntu-latest 16 | steps: 17 | - task: UsePythonVersion@0 18 | inputs: 19 | versionSpec: '3.10' 20 | addToPath: true 21 | 22 | - script: | 23 | python -m pip install --upgrade pip 24 | pip install flake8 isort mypy black pylint 25 | displayName: 'Install Python code quality tools' 26 | 27 | - script: | 28 | flake8 . || echo "Code style issues" 29 | displayName: 'Check code style with flake8' 30 | 31 | - script: | 32 | isort --check-only . --diff || echo "Import order issues" 33 | displayName: 'Check import order with isort' 34 | 35 | - script: | 36 | mypy . --ignore-missing-imports || echo "Type checking issues" 37 | displayName: 'Perform type check with mypy' 38 | 39 | - script: | 40 | black --check . || echo "Code format issues" 41 | displayName: 'Check code format with black' 42 | 43 | - script: | 44 | pylint . || echo "Code quality issues" 45 | displayName: 'Check code quality with pylint' 46 | 47 | - script: | 48 | python -m pip install --upgrade pip 49 | pip install -r $(Build.SourcesDirectory)/function_api/function_app/requirements.txt --target="$(Build.SourcesDirectory)/function_api/function_app/.python_packages/lib/site-packages" 50 | displayName: 'Install dependencies for Function App' 51 | 52 | - script: | 53 | cd $(Build.SourcesDirectory)/function_api/function_app 54 | zip -r $(Build.ArtifactStagingDirectory)/functionapp.zip . 55 | displayName: 'Pack function for Function App' 56 | 57 | - script: | 58 | mkdir -p $(Build.ArtifactStagingDirectory)/services 59 | tar -czf $(Build.ArtifactStagingDirectory)/services/airbyte.tar.gz airbyte 60 | tar -czf $(Build.ArtifactStagingDirectory)/services/kafka.tar.gz kafka 61 | tar -czf $(Build.ArtifactStagingDirectory)/services/airflow.tar.gz airflow 62 | tar -czf $(Build.ArtifactStagingDirectory)/services/mlflow.tar.gz mlflow 63 | displayName: 'Pack folders' 64 | 65 | - publish: $(Build.ArtifactStagingDirectory)/functionapp.zip 66 | artifact: FunctionAppArtifact 67 | displayName: 'Publish Function App Artifact' 68 | 69 | - publish: $(Build.ArtifactStagingDirectory)/services 70 | artifact: CompressedFolders 71 | displayName: 'Publish packed folders Artifact' 72 | 73 | - stage: Continuous_Deployment 74 | jobs: 75 | - job: Continuous_Deployment 76 | pool: 77 | vmImage: ubuntu-latest 78 | steps: 79 | - download: current 80 | artifact: FunctionAppArtifact 81 | displayName: 'Download Function App Artifact' 82 | 83 | - download: current 84 | artifact: CompressedFolders 85 | displayName: 'Download packed folders Artifact' 86 | 87 | - powershell: | 88 | $rgn = "$(resourceGroupName)".ToLower() -replace '[^a-z0-9]', '' 89 | $rgn = $rgn.Substring(0, [System.Math]::Min($rgn.Length, 19)) 90 | Write-Host "##vso[task.setvariable variable=resourceGroupName]$rgn" 91 | displayName: 'Assign parsed value to resourceGroupName pipeline variable' 92 | 93 | - powershell: | 94 | $tsan = "$(resourceGroupName)" + "1qe3p" 95 | $tcn = "configs" 96 | Write-Host "##vso[task.setvariable variable=tempStorageAccountName]$tsan" 97 | Write-Host "##vso[task.setvariable variable=tempContainerName]$tcn" 98 | displayName: 'Assign parsed value to tempStorageAccountName and tempContainerName pipeline variables' 99 | 100 | - powershell: | 101 | $san = "$(resourceGroupName)" + "3de90" 102 | $cn = "kafkadata" 103 | Write-Host "##vso[task.setvariable variable=storageAccountName]$san" 104 | Write-Host "##vso[task.setvariable variable=containerName]$cn" 105 | displayName: 'Assign parsed value to storageAccountName and containerName pipeline variables' 106 | 107 | - powershell: | 108 | $fan = "$(resourceGroupName)" + "2xa4c" 109 | Write-Host "##vso[task.setvariable variable=functionAppName]$fan" 110 | displayName: 'Assign parsed value to functionAppName pipeline variable' 111 | 112 | - powershell: | 113 | $apim = "$(resourceGroupName)" + "a1l45" 114 | Write-Host "##vso[task.setvariable variable=apiManagementName]$apim" 115 | displayName: 'Assign parsed value to apiManagementName pipeline variable' 116 | 117 | - powershell: | 118 | $loc = "$(location)" 119 | $loc = $loc.ToLower() -replace ' ', '' 120 | Write-Host "##vso[task.setvariable variable=lowercaseLocation]$loc" 121 | displayName: 'Convert location to lowercase and remove spaces' 122 | 123 | - task: AzureResourceManagerTemplateDeployment@3 124 | displayName: 'Deploy temporary Azure Blob Storage' 125 | inputs: 126 | deploymentScope: 'Resource Group' 127 | azureResourceManagerConnection: $(azconnection) 128 | subscriptionId: $(subscriptionId) 129 | action: 'Create Or Update Resource Group' 130 | resourceGroupName: $(resourceGroupName) 131 | location: $(location) 132 | templateLocation: 'Linked artifact' 133 | csmFile: '$(Build.SourcesDirectory)/arm_templates/blob_storage/temp_blob_storage_template.json' 134 | overrideParameters: '-storageAccountName "$(tempStorageAccountName)" -containerName "$(tempContainerName)"' 135 | deploymentMode: 'Incremental' 136 | 137 | - task: AzureCLI@2 138 | displayName: 'Upload tar.gz files to temporary storage' 139 | inputs: 140 | azureSubscription: $(azconnection) 141 | scriptType: 'bash' 142 | scriptLocation: 'inlineScript' 143 | inlineScript: | 144 | az storage blob upload --account-name $(tempStorageAccountName) --container-name $(tempContainerName) --file $(Pipeline.Workspace)/CompressedFolders/airbyte.tar.gz --name airbyte.tar.gz --overwrite true 145 | az storage blob upload --account-name $(tempStorageAccountName) --container-name $(tempContainerName) --file $(Pipeline.Workspace)/CompressedFolders/kafka.tar.gz --name kafka.tar.gz --overwrite true 146 | az storage blob upload --account-name $(tempStorageAccountName) --container-name $(tempContainerName) --file $(Pipeline.Workspace)/CompressedFolders/airflow.tar.gz --name airflow.tar.gz --overwrite true 147 | az storage blob upload --account-name $(tempStorageAccountName) --container-name $(tempContainerName) --file $(Pipeline.Workspace)/CompressedFolders/mlflow.tar.gz --name mlflow.tar.gz --overwrite true 148 | 149 | - task: AzureResourceManagerTemplateDeployment@3 150 | displayName: 'Deploy Azure ML Studio Workspace and dependencies' 151 | inputs: 152 | deploymentScope: 'Resource Group' 153 | azureResourceManagerConnection: $(azconnection) 154 | subscriptionId: $(subscriptionId) 155 | action: 'Create Or Update Resource Group' 156 | resourceGroupName: $(resourceGroupName) 157 | location: $(location) 158 | templateLocation: 'Linked artifact' 159 | csmFile: '$(Build.SourcesDirectory)/arm_templates/mlstudio/azureml_template.json' 160 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/mlstudio/azureml_parameters.json' 161 | deploymentMode: 'Incremental' 162 | 163 | - task: AzureResourceManagerTemplateDeployment@3 164 | displayName: 'Deploy Azure Blob Storage' 165 | inputs: 166 | deploymentScope: 'Resource Group' 167 | azureResourceManagerConnection: $(azconnection) 168 | subscriptionId: $(subscriptionId) 169 | action: 'Create Or Update Resource Group' 170 | resourceGroupName: $(resourceGroupName) 171 | location: $(location) 172 | templateLocation: 'Linked artifact' 173 | csmFile: '$(Build.SourcesDirectory)/arm_templates/blob_storage/blob_storage_template.json' 174 | overrideParameters: '-storageAccountName "$(storageAccountName)" -containerName "$(containerName)"' 175 | deploymentMode: 'Incremental' 176 | 177 | - task: AzureResourceManagerTemplateDeployment@3 178 | displayName: 'Deploy Kafka with its infrastructure' 179 | inputs: 180 | deploymentScope: 'Resource Group' 181 | azureResourceManagerConnection: $(azconnection) 182 | subscriptionId: $(subscriptionId) 183 | action: 'Create Or Update Resource Group' 184 | resourceGroupName: $(resourceGroupName) 185 | location: $(location) 186 | templateLocation: 'Linked artifact' 187 | csmFile: '$(Build.SourcesDirectory)/arm_templates/kafka_infra/kafka_infra_template.json' 188 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/kafka_infra/kafka_infra_parameters.json' 189 | overrideParameters: '-tempStorageAccountName "$(tempStorageAccountName)" -tempContainerName "$(tempContainerName)"' 190 | deploymentMode: 'Incremental' 191 | 192 | - task: AzureResourceManagerTemplateDeployment@3 193 | displayName: 'Deploy Airbyte with its infrastructure' 194 | inputs: 195 | deploymentScope: 'Resource Group' 196 | azureResourceManagerConnection: $(azconnection) 197 | subscriptionId: $(subscriptionId) 198 | action: 'Create Or Update Resource Group' 199 | resourceGroupName: $(resourceGroupName) 200 | location: $(location) 201 | templateLocation: 'Linked artifact' 202 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airbyte_infra/airbyte_infra_template.json' 203 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/airbyte_infra/airbyte_infra_parameters.json' 204 | overrideParameters: '-tempStorageAccountName "$(tempStorageAccountName)" -tempContainerName "$(tempContainerName)"' 205 | deploymentMode: 'Incremental' 206 | 207 | - task: AzureResourceManagerTemplateDeployment@3 208 | displayName: 'Deploy MLflow with its infrastructure' 209 | inputs: 210 | deploymentScope: 'Resource Group' 211 | azureResourceManagerConnection: $(azconnection) 212 | subscriptionId: $(subscriptionId) 213 | action: 'Create Or Update Resource Group' 214 | resourceGroupName: $(resourceGroupName) 215 | location: $(location) 216 | templateLocation: 'Linked artifact' 217 | csmFile: '$(Build.SourcesDirectory)/arm_templates/mlflow_infra/mlflow_infra_template.json' 218 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/mlflow_infra/mlflow_infra_parameters.json' 219 | overrideParameters: '-tempStorageAccountName "$(tempStorageAccountName)" -tempContainerName "$(tempContainerName)"' 220 | deploymentMode: 'Incremental' 221 | 222 | - task: AzureResourceManagerTemplateDeployment@3 223 | displayName: 'Deploy Airflow with its infrastructure' 224 | inputs: 225 | deploymentScope: 'Resource Group' 226 | azureResourceManagerConnection: $(azconnection) 227 | subscriptionId: $(subscriptionId) 228 | action: 'Create Or Update Resource Group' 229 | resourceGroupName: $(resourceGroupName) 230 | location: $(location) 231 | templateLocation: 'Linked artifact' 232 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_infra_template.json' 233 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_infra_parameters.json' 234 | overrideParameters: '-tempStorageAccountName "$(tempStorageAccountName)" -tempContainerName "$(tempContainerName)"' 235 | deploymentMode: 'Incremental' 236 | 237 | - task: AzureResourceManagerTemplateDeployment@3 238 | displayName: 'Deploy VNet Peering between Airbyte and Kafka' 239 | inputs: 240 | deploymentScope: 'Resource Group' 241 | azureResourceManagerConnection: $(azconnection) 242 | subscriptionId: $(subscriptionId) 243 | action: 'Create Or Update Resource Group' 244 | resourceGroupName: $(resourceGroupName) 245 | location: $(location) 246 | templateLocation: 'Linked artifact' 247 | csmFile: '$(Build.SourcesDirectory)/arm_templates/vnet_peerings/vnet_peering_template.json' 248 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/vnet_peerings/airbyte_kafka_vnet_peering_parameters.json' 249 | deploymentMode: 'Incremental' 250 | 251 | - task: AzureResourceManagerTemplateDeployment@3 252 | displayName: 'Deploy VNet Peering between Kafka and Airflow' 253 | inputs: 254 | deploymentScope: 'Resource Group' 255 | azureResourceManagerConnection: $(azconnection) 256 | subscriptionId: $(subscriptionId) 257 | action: 'Create Or Update Resource Group' 258 | resourceGroupName: $(resourceGroupName) 259 | location: $(location) 260 | templateLocation: 'Linked artifact' 261 | csmFile: '$(Build.SourcesDirectory)/arm_templates/vnet_peerings/vnet_peering_template.json' 262 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/vnet_peerings/kafka_airflow_vnet_peering_parameters.json' 263 | deploymentMode: 'Incremental' 264 | 265 | - task: AzureResourceManagerTemplateDeployment@3 266 | displayName: 'Deploy VNet Peering between Airflow and MLflow' 267 | inputs: 268 | deploymentScope: 'Resource Group' 269 | azureResourceManagerConnection: $(azconnection) 270 | subscriptionId: $(subscriptionId) 271 | action: 'Create Or Update Resource Group' 272 | resourceGroupName: $(resourceGroupName) 273 | location: $(location) 274 | templateLocation: 'Linked artifact' 275 | csmFile: '$(Build.SourcesDirectory)/arm_templates/vnet_peerings/vnet_peering_template.json' 276 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/vnet_peerings/airflow_mlflow_vnet_peering_parameters.json' 277 | deploymentMode: 'Incremental' 278 | 279 | - task: AzureResourceManagerTemplateDeployment@3 280 | displayName: 'Deploy Private Endpoint from Airflow to Azure Blob Storage' 281 | inputs: 282 | deploymentScope: 'Resource Group' 283 | azureResourceManagerConnection: $(azconnection) 284 | subscriptionId: $(subscriptionId) 285 | action: 'Create Or Update Resource Group' 286 | resourceGroupName: $(resourceGroupName) 287 | location: $(location) 288 | templateLocation: 'Linked artifact' 289 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_storage_private_endpoint_template.json' 290 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_storage_private_endpoint_parameters.json' 291 | overrideParameters: '-storageAccountName "$(storageAccountName)"' 292 | deploymentMode: 'Incremental' 293 | 294 | - task: AzureResourceManagerTemplateDeployment@3 295 | displayName: 'Deploy Private Endpoint from Airflow to Azure ML Studio Workspace' 296 | inputs: 297 | deploymentScope: 'Resource Group' 298 | azureResourceManagerConnection: $(azconnection) 299 | subscriptionId: $(subscriptionId) 300 | action: 'Create Or Update Resource Group' 301 | resourceGroupName: $(resourceGroupName) 302 | location: $(location) 303 | templateLocation: 'Linked artifact' 304 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_mlstudio_private_endpoint_template.json' 305 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_mlstudio_private_endpoint_parameters.json' 306 | deploymentMode: 'Incremental' 307 | 308 | - task: AzureResourceManagerTemplateDeployment@3 309 | displayName: 'Deploy private DNS zone to connect Airflow to Azure Blob Storage' 310 | inputs: 311 | deploymentScope: 'Resource Group' 312 | azureResourceManagerConnection: $(azconnection) 313 | subscriptionId: $(subscriptionId) 314 | action: 'Create Or Update Resource Group' 315 | resourceGroupName: $(resourceGroupName) 316 | location: $(location) 317 | templateLocation: 'Linked artifact' 318 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/blob_storage_private_link_template.json' 319 | overrideParameters: '-storageAccountName "$(storageAccountName)"' 320 | deploymentMode: 'Incremental' 321 | 322 | - task: AzurePowerShell@5 323 | displayName: 'Assign Azure ML Workspace ID to pipeline variable' 324 | inputs: 325 | azureSubscription: $(azconnection) 326 | ScriptType: 'InlineScript' 327 | Inline: | 328 | $workspace = Get-AzResource -ResourceGroupName "$(resourceGroupName)" -ResourceType "Microsoft.MachineLearningServices/workspaces" -Name "mlserving" 329 | $wid = $workspace.Properties.workspaceId 330 | Write-Host "##vso[task.setvariable variable=workspaceId]$wid" 331 | azurePowerShellVersion: 'Latest' 332 | 333 | - task: AzureResourceManagerTemplateDeployment@3 334 | displayName: 'Deploy private DNS zone to connect Airflow to Azure ML Studio Workspace' 335 | inputs: 336 | deploymentScope: 'Resource Group' 337 | azureResourceManagerConnection: $(azconnection) 338 | subscriptionId: $(subscriptionId) 339 | action: 'Create Or Update Resource Group' 340 | resourceGroupName: $(resourceGroupName) 341 | location: $(location) 342 | templateLocation: 'Linked artifact' 343 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/azureml_private_link_template.json' 344 | overrideParameters: '-mlworkspaceId "$(workspaceId)"' 345 | deploymentMode: 'Incremental' 346 | 347 | - task: AzureResourceManagerTemplateDeployment@3 348 | displayName: 'Deploy Virtual Network for Function App' 349 | inputs: 350 | deploymentScope: 'Resource Group' 351 | azureResourceManagerConnection: $(azconnection) 352 | subscriptionId: $(subscriptionId) 353 | action: 'Create Or Update Resource Group' 354 | resourceGroupName: $(resourceGroupName) 355 | location: $(location) 356 | templateLocation: 'Linked artifact' 357 | csmFile: '$(Build.SourcesDirectory)/arm_templates/function_app/functionapp_vnet_template.json' 358 | deploymentMode: 'Incremental' 359 | 360 | - task: AzureResourceManagerTemplateDeployment@3 361 | displayName: 'Deploy Azure Function App' 362 | inputs: 363 | deploymentScope: 'Resource Group' 364 | azureResourceManagerConnection: $(azconnection) 365 | subscriptionId: '$(subscriptionId)' 366 | action: 'Create Or Update Resource Group' 367 | resourceGroupName: $(resourceGroupName) 368 | location: $(location) 369 | templateLocation: 'Linked artifact' 370 | csmFile: '$(Build.SourcesDirectory)/arm_templates/function_app/function_app_template.json' 371 | overrideParameters: '-name "$(functionAppName)"' 372 | deploymentMode: 'Incremental' 373 | 374 | - task: AzureFunctionApp@2 375 | displayName: 'Deploy packed function to Azure Function App' 376 | inputs: 377 | connectedServiceNameARM: $(azconnection) 378 | appType: 'functionAppLinux' 379 | appName: $(functionAppName) 380 | package: '$(Pipeline.Workspace)/FunctionAppArtifact/functionapp.zip' 381 | runtimeStack: 'PYTHON|3.10' 382 | deploymentMethod: 'auto' 383 | 384 | - task: AzureResourceManagerTemplateDeployment@3 385 | displayName: 'Deploy Private Endpoint from Function App to Azure ML Studio Workspace' 386 | inputs: 387 | deploymentScope: 'Resource Group' 388 | azureResourceManagerConnection: $(azconnection) 389 | subscriptionId: $(subscriptionId) 390 | action: 'Create Or Update Resource Group' 391 | resourceGroupName: $(resourceGroupName) 392 | location: $(location) 393 | templateLocation: 'Linked artifact' 394 | csmFile: '$(Build.SourcesDirectory)/arm_templates/airflow_infra/airflow_mlstudio_private_endpoint_template.json' 395 | csmParametersFile: '$(Build.SourcesDirectory)/arm_templates/function_app/function_azureml_private_endpoint_parameters.json' 396 | deploymentMode: 'Incremental' 397 | 398 | - task: AzureResourceManagerTemplateDeployment@3 399 | displayName: 'Deploy private DNS zone to connect Function App to Azure ML Studio Workspace' 400 | inputs: 401 | deploymentScope: 'Resource Group' 402 | azureResourceManagerConnection: $(azconnection) 403 | subscriptionId: $(subscriptionId) 404 | action: 'Create Or Update Resource Group' 405 | resourceGroupName: '$(resourceGroupName)functionappdns' 406 | location: $(location) 407 | templateLocation: 'Linked artifact' 408 | csmFile: '$(Build.SourcesDirectory)/arm_templates/function_app/function_azureml_private_link_template.json' 409 | overrideParameters: '-mlworkspaceId "$(workspaceId)" -resourceGroup "$(resourceGroupName)"' 410 | deploymentMode: 'Incremental' 411 | 412 | - task: AzurePowerShell@5 413 | displayName: 'Roles assignment' 414 | inputs: 415 | azureSubscription: $(azconnection) 416 | ScriptType: 'FilePath' 417 | ScriptPath: 'scripts/roles_assignment.ps1' 418 | ScriptArguments: '-resourceGroupName "$(resourceGroupName)" -storageAccountName "$(storageAccountName)" -functionAppName "$(functionAppName)"' 419 | azurePowerShellVersion: 'LatestVersion' 420 | 421 | - script: chmod +x function_api/apimanagement.sh 422 | displayName: 'Make apimanagement.sh executable' 423 | 424 | - task: AzureCLI@2 425 | displayName: 'Deploy API Management service and API for Function App' 426 | inputs: 427 | azureSubscription: $(azconnection) 428 | scriptType: 'bash' 429 | scriptLocation: 'inlineScript' 430 | inlineScript: | 431 | cd function_api 432 | ./apimanagement.sh $(resourceGroupName) $(lowercaseLocation) $(apiManagementName) $(functionAppName) 433 | 434 | - task: AzurePowerShell@5 435 | displayName: 'Inbound policy assignment to function API' 436 | inputs: 437 | azureSubscription: $(azconnection) 438 | ScriptType: 'InlineScript' 439 | Inline: | 440 | $apimContext = New-AzApiManagementContext -ResourceGroupName "$(resourceGroupName)" -ServiceName "$(apiManagementName)" 441 | $policy = " " + 442 | " " + 443 | " " + 444 | "{{FunctionKey}} " + 445 | " " + 446 | " " + 447 | "application/json " + 448 | " " + 449 | " " + 450 | " " + 451 | " " + 452 | " " + 453 | " " + 454 | " " + 455 | " " + 456 | " " + 457 | " " + 458 | " " + 459 | "" 460 | Set-AzApiManagementPolicy -Context $apimContext -ApiId "function" -Policy $policy -OperationId "post-operation" 461 | azurePowerShellVersion: 'Latest' 462 | -------------------------------------------------------------------------------- /arm_templates/mlstudio/azureml_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "workspaceName": { 6 | "type": "String", 7 | "metadata": { 8 | "description": "Specifies the name of the Azure Machine Learning workspace." 9 | } 10 | }, 11 | "kind": { 12 | "defaultValue": "Default", 13 | "allowedValues": [ 14 | "Default", 15 | "FeatureStore", 16 | "Hub", 17 | "Project" 18 | ], 19 | "type": "String" 20 | }, 21 | "appInsightsLogWorkspaceName": { 22 | "defaultValue": "[concat('ai', uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 23 | "type": "String", 24 | "metadata": { 25 | "description": "Specifies log workspace name of the log workspace created for the Application Insights." 26 | } 27 | }, 28 | "sku": { 29 | "defaultValue": "Basic", 30 | "allowedValues": [ 31 | "Basic", 32 | "Enterprise" 33 | ], 34 | "type": "String", 35 | "metadata": { 36 | "description": "Specifies the sku, also referred as 'edition' of the Azure Machine Learning workspace." 37 | } 38 | }, 39 | "identityType": { 40 | "defaultValue": "systemAssigned", 41 | "allowedValues": [ 42 | "systemAssigned", 43 | "userAssigned" 44 | ], 45 | "type": "String", 46 | "metadata": { 47 | "description": "Specifies the identity type of the Azure Machine Learning workspace." 48 | } 49 | }, 50 | "primaryUserAssignedIdentityResourceGroup": { 51 | "defaultValue": "[variables('resourceGroupName')]", 52 | "type": "String", 53 | "metadata": { 54 | "description": "Specifies the resource group of user assigned identity that represents the Azure Machine Learing workspace." 55 | } 56 | }, 57 | "primaryUserAssignedIdentityName": { 58 | "defaultValue": "", 59 | "type": "String", 60 | "metadata": { 61 | "description": "Specifies the name of user assigned identity that represents the Azure Machine Learing workspace." 62 | } 63 | }, 64 | "storageAccountOption": { 65 | "defaultValue": "new", 66 | "allowedValues": [ 67 | "new", 68 | "existing" 69 | ], 70 | "type": "String", 71 | "metadata": { 72 | "description": "Determines whether or not a new storage should be provisioned." 73 | } 74 | }, 75 | "storageAccountName": { 76 | "defaultValue": "[concat('sa', uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 77 | "type": "String", 78 | "metadata": { 79 | "description": "Name of the storage account." 80 | } 81 | }, 82 | "storageAccountType": { 83 | "defaultValue": "Standard_LRS", 84 | "allowedValues": [ 85 | "Standard_LRS", 86 | "Standard_GRS", 87 | "Standard_RAGRS", 88 | "Standard_ZRS", 89 | "Standard_GZRS", 90 | "Standard_RAGZRS" 91 | ], 92 | "type": "String" 93 | }, 94 | "storageAccountBehindVNet": { 95 | "defaultValue": "false", 96 | "allowedValues": [ 97 | "true", 98 | "false" 99 | ], 100 | "type": "String", 101 | "metadata": { 102 | "description": "Determines whether or not to put the storage account behind VNet" 103 | } 104 | }, 105 | "storageAccountResourceGroupName": { 106 | "defaultValue": "[variables('resourceGroupName')]", 107 | "type": "String" 108 | }, 109 | "storageAccountLocation": { 110 | "defaultValue": "[variables('location')]", 111 | "type": "String" 112 | }, 113 | "storageAccountHnsEnabled": { 114 | "defaultValue": false, 115 | "type": "Bool" 116 | }, 117 | "keyVaultOption": { 118 | "defaultValue": "new", 119 | "allowedValues": [ 120 | "new", 121 | "existing" 122 | ], 123 | "type": "String", 124 | "metadata": { 125 | "description": "Determines whether or not a new key vault should be provisioned." 126 | } 127 | }, 128 | "keyVaultName": { 129 | "defaultValue": "[concat('kv', uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 130 | "type": "String", 131 | "metadata": { 132 | "description": "Name of the key vault." 133 | } 134 | }, 135 | "keyVaultBehindVNet": { 136 | "defaultValue": "false", 137 | "allowedValues": [ 138 | "true", 139 | "false" 140 | ], 141 | "type": "String", 142 | "metadata": { 143 | "description": "Determines whether or not to put the storage account behind VNet" 144 | } 145 | }, 146 | "keyVaultResourceGroupName": { 147 | "defaultValue": "[variables('resourceGroupName')]", 148 | "type": "String" 149 | }, 150 | "keyVaultLocation": { 151 | "defaultValue": "[variables('location')]", 152 | "type": "String" 153 | }, 154 | "applicationInsightsOption": { 155 | "defaultValue": "new", 156 | "allowedValues": [ 157 | "new", 158 | "existing", 159 | "none" 160 | ], 161 | "type": "String", 162 | "metadata": { 163 | "description": "Determines whether or not new ApplicationInsights should be provisioned." 164 | } 165 | }, 166 | "applicationInsightsName": { 167 | "defaultValue": "[concat('ai', uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 168 | "type": "String", 169 | "metadata": { 170 | "description": "Name of ApplicationInsights." 171 | } 172 | }, 173 | "applicationInsightsResourceGroupName": { 174 | "defaultValue": "[variables('resourceGroupName')]", 175 | "type": "String" 176 | }, 177 | "applicationInsightsLocation": { 178 | "defaultValue": "[variables('location')]", 179 | "type": "String" 180 | }, 181 | "containerRegistryOption": { 182 | "defaultValue": "none", 183 | "allowedValues": [ 184 | "new", 185 | "existing", 186 | "none" 187 | ], 188 | "type": "String", 189 | "metadata": { 190 | "description": "Determines whether or not a new container registry should be provisioned." 191 | } 192 | }, 193 | "containerRegistryName": { 194 | "defaultValue": "[concat('cr', uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 195 | "type": "String", 196 | "metadata": { 197 | "description": "The container registry bind to the workspace." 198 | } 199 | }, 200 | "containerRegistrySku": { 201 | "defaultValue": "Premium", 202 | "allowedValues": [ 203 | "Basic", 204 | "Standard", 205 | "Premium" 206 | ], 207 | "type": "String" 208 | }, 209 | "containerRegistryResourceGroupName": { 210 | "defaultValue": "[variables('resourceGroupName')]", 211 | "type": "String" 212 | }, 213 | "containerRegistryBehindVNet": { 214 | "defaultValue": "false", 215 | "allowedValues": [ 216 | "true", 217 | "false" 218 | ], 219 | "type": "String", 220 | "metadata": { 221 | "description": "Determines whether or not to put container registry behind VNet." 222 | } 223 | }, 224 | "containerRegistryLocation": { 225 | "defaultValue": "[variables('location')]", 226 | "type": "String" 227 | }, 228 | "vnetOption": { 229 | "defaultValue": "[if(equals(parameters('privateEndpointType'), 'none'), 'none', 'new')]", 230 | "allowedValues": [ 231 | "new", 232 | "existing", 233 | "none" 234 | ], 235 | "type": "String", 236 | "metadata": { 237 | "description": "Determines whether or not a new VNet should be provisioned." 238 | } 239 | }, 240 | "vnetName": { 241 | "defaultValue": "[concat('vn',uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 242 | "type": "String", 243 | "metadata": { 244 | "description": "Name of the VNet" 245 | } 246 | }, 247 | "vnetResourceGroupName": { 248 | "defaultValue": "[variables('resourceGroupName')]", 249 | "type": "String" 250 | }, 251 | "addressPrefixes": { 252 | "defaultValue": [ 253 | "10.0.0.0/16" 254 | ], 255 | "type": "Array", 256 | "metadata": { 257 | "description": "Address prefix of the virtual network" 258 | } 259 | }, 260 | "subnetOption": { 261 | "defaultValue": "[if(or(not(equals(parameters('privateEndpointType'), 'none')), equals(parameters('vnetOption'), 'new')), 'new', 'none')]", 262 | "allowedValues": [ 263 | "new", 264 | "existing", 265 | "none" 266 | ], 267 | "type": "String", 268 | "metadata": { 269 | "description": "Determines whether or not a new subnet should be provisioned." 270 | } 271 | }, 272 | "subnetName": { 273 | "defaultValue": "[concat('sn',uniqueString(variables('resourceGroupName'), parameters('workspaceName')))]", 274 | "type": "String", 275 | "metadata": { 276 | "description": "Name of the subnet" 277 | } 278 | }, 279 | "subnetPrefix": { 280 | "defaultValue": "10.0.0.0/24", 281 | "type": "String", 282 | "metadata": { 283 | "description": "Subnet prefix of the virtual network" 284 | } 285 | }, 286 | "adbWorkspace": { 287 | "defaultValue": "", 288 | "type": "String", 289 | "metadata": { 290 | "description": "Azure Databrick workspace to be linked to the workspace" 291 | } 292 | }, 293 | "confidential_data": { 294 | "defaultValue": "false", 295 | "allowedValues": [ 296 | "false", 297 | "true" 298 | ], 299 | "type": "String", 300 | "metadata": { 301 | "description": "Specifies that the Azure Machine Learning workspace holds highly confidential data." 302 | } 303 | }, 304 | "encryption_status": { 305 | "defaultValue": "Disabled", 306 | "allowedValues": [ 307 | "Enabled", 308 | "Disabled" 309 | ], 310 | "type": "String", 311 | "metadata": { 312 | "description": "Specifies if the Azure Machine Learning workspace should be encrypted with customer managed key." 313 | } 314 | }, 315 | "cmk_keyvault": { 316 | "defaultValue": "", 317 | "type": "String", 318 | "metadata": { 319 | "description": "Specifies the customer managed keyVault arm id." 320 | } 321 | }, 322 | "resource_cmk_uri": { 323 | "defaultValue": "", 324 | "type": "String", 325 | "metadata": { 326 | "description": "Specifies if the customer managed keyvault key uri." 327 | } 328 | }, 329 | "privateEndpointType": { 330 | "defaultValue": "none", 331 | "allowedValues": [ 332 | "AutoApproval", 333 | "ManualApproval", 334 | "none" 335 | ], 336 | "type": "String" 337 | }, 338 | "tagValues": { 339 | "defaultValue": {}, 340 | "type": "Object" 341 | }, 342 | "privateEndpointName": { 343 | "defaultValue": "pe", 344 | "type": "String", 345 | "metadata": { 346 | "description": "Name of the private end point added to the workspace" 347 | } 348 | }, 349 | "privateEndpointResourceGroupName": { 350 | "defaultValue": "[variables('resourceGroupName')]", 351 | "type": "String", 352 | "metadata": { 353 | "description": "Name of the resource group where the private end point is added to" 354 | } 355 | }, 356 | "privateEndpointSubscription": { 357 | "defaultValue": "[subscription().subscriptionId]", 358 | "type": "String", 359 | "metadata": { 360 | "description": "Id of the subscription where the private end point is added to" 361 | } 362 | }, 363 | "systemDatastoresAuthMode": { 364 | "defaultValue": "accessKey", 365 | "type": "String", 366 | "metadata": { 367 | "description": "Identity type of storage account services." 368 | } 369 | }, 370 | "managedNetwork": { 371 | "defaultValue": { 372 | "isolationMode": "AllowInternetOutbound" 373 | }, 374 | "type": "Object", 375 | "metadata": { 376 | "description": "Managed network settings to be used for the workspace. If not specified, isolation mode Disabled is the default" 377 | } 378 | }, 379 | "publicNetworkAccess": { 380 | "defaultValue": "Disabled", 381 | "type": "String", 382 | "metadata": { 383 | "description": "Specifies whether the workspace can be accessed by public networks or not." 384 | } 385 | } 386 | }, 387 | "variables": { 388 | "resourceGroupName": "[resourceGroup().name]", 389 | "location": "[resourceGroup().location]", 390 | "tenantId": "[subscription().tenantId]", 391 | "storageAccount": "[resourceId(parameters('storageAccountResourceGroupName'), 'Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]", 392 | "keyVault": "[resourceId(parameters('keyVaultResourceGroupName'), 'Microsoft.KeyVault/vaults', parameters('keyVaultName'))]", 393 | "containerRegistry": "[resourceId(parameters('containerRegistryResourceGroupName'), 'Microsoft.ContainerRegistry/registries', parameters('containerRegistryName'))]", 394 | "applicationInsights": "[resourceId(parameters('applicationInsightsResourceGroupName'), 'Microsoft.Insights/components', parameters('applicationInsightsName'))]", 395 | "subnet": "[resourceId(parameters('privateEndpointSubscription'), parameters('vnetResourceGroupName'), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('subnetName'))]", 396 | "networkRuleSetBehindVNet": { 397 | "defaultAction": "deny", 398 | "virtualNetworkRules": [ 399 | { 400 | "action": "Allow", 401 | "id": "[variables('subnet')]" 402 | } 403 | ] 404 | }, 405 | "privateEndpointSettings": { 406 | "name": "[concat(parameters('workspaceName'), '-PrivateEndpoint')]", 407 | "properties": { 408 | "privateLinkServiceId": "[resourceId('Microsoft.MachineLearningServices/workspaces', parameters('workspaceName'))]", 409 | "groupIds": [ 410 | "amlworkspace" 411 | ] 412 | } 413 | }, 414 | "defaultPEConnections": "[array(variables('privateEndpointSettings'))]", 415 | "privateEndpointDeploymentName": "[concat('DeployPrivateEndpoint-', uniqueString(parameters('privateEndpointName')))]", 416 | "userAssignedIdentities": { 417 | "[variables('primaryUserAssignedIdentity')]": {} 418 | }, 419 | "primaryUserAssignedIdentity": "[resourceId(parameters('primaryUserAssignedIdentityResourceGroup'), 'Microsoft.ManagedIdentity/userAssignedIdentities', parameters('primaryUserAssignedIdentityName'))]" 420 | }, 421 | "resources": [ 422 | { 423 | "type": "Microsoft.Storage/storageAccounts", 424 | "apiVersion": "2019-04-01", 425 | "name": "[parameters('storageAccountName')]", 426 | "location": "[parameters('storageAccountLocation')]", 427 | "tags": "[parameters('tagValues')]", 428 | "sku": { 429 | "name": "[parameters('storageAccountType')]" 430 | }, 431 | "kind": "StorageV2", 432 | "properties": { 433 | "encryption": { 434 | "services": { 435 | "blob": { 436 | "enabled": true 437 | }, 438 | "file": { 439 | "enabled": true 440 | } 441 | }, 442 | "keySource": "Microsoft.Storage" 443 | }, 444 | "supportsHttpsTrafficOnly": true, 445 | "allowBlobPublicAccess": false, 446 | "networkAcls": "[if(equals(parameters('storageAccountBehindVNet'), 'true'), variables('networkRuleSetBehindVNet'), json('null'))]", 447 | "isHnsEnabled": "[parameters('storageAccountHnsEnabled')]", 448 | "minimumTlsVersion": "TLS1_2" 449 | }, 450 | "condition": "[equals(parameters('storageAccountOption'), 'new')]" 451 | }, 452 | { 453 | "type": "Microsoft.KeyVault/vaults", 454 | "apiVersion": "2019-09-01", 455 | "name": "[parameters('keyVaultName')]", 456 | "location": "[parameters('keyVaultLocation')]", 457 | "tags": "[parameters('tagValues')]", 458 | "properties": { 459 | "tenantId": "[variables('tenantId')]", 460 | "sku": { 461 | "name": "standard", 462 | "family": "A" 463 | }, 464 | "accessPolicies": [], 465 | "networkAcls": "[if(equals(parameters('keyVaultBehindVNet'), 'true'), variables('networkRuleSetBehindVNet'), json('null'))]" 466 | }, 467 | "condition": "[equals(parameters('keyVaultOption'), 'new')]" 468 | }, 469 | { 470 | "type": "Microsoft.ContainerRegistry/registries", 471 | "apiVersion": "2019-05-01", 472 | "name": "[parameters('containerRegistryName')]", 473 | "location": "[parameters('containerRegistryLocation')]", 474 | "tags": "[parameters('tagValues')]", 475 | "sku": { 476 | "name": "[parameters('containerRegistrySku')]" 477 | }, 478 | "properties": { 479 | "adminUserEnabled": true, 480 | "networkRuleSet": "[if(equals(parameters('containerRegistryBehindVNet'), 'true'), variables('networkRuleSetBehindVNet'), json('null'))]" 481 | }, 482 | "condition": "[equals(parameters('containerRegistryOption'), 'new')]" 483 | }, 484 | { 485 | "type": "Microsoft.OperationalInsights/workspaces", 486 | "apiVersion": "2020-08-01", 487 | "name": "[parameters('appInsightsLogWorkspaceName')]", 488 | "location": "[parameters('applicationInsightsLocation')]", 489 | "tags": "[parameters('tagValues')]", 490 | "condition": "[equals(parameters('applicationInsightsOption'), 'new')]" 491 | }, 492 | { 493 | "type": "Microsoft.Insights/components", 494 | "apiVersion": "2020-02-02-preview", 495 | "name": "[parameters('applicationInsightsName')]", 496 | "location": "[parameters('applicationInsightsLocation')]", 497 | "dependsOn": [ 498 | "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('appInsightsLogWorkspaceName'))]" 499 | ], 500 | "tags": "[parameters('tagValues')]", 501 | "properties": { 502 | "ApplicationId": "[parameters('applicationInsightsName')]", 503 | "Application_Type": "web", 504 | "Flow_Type": "Redfield", 505 | "Request_Source": "IbizaMachineLearningExtension", 506 | "WorkspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('appInsightsLogWorkspaceName'))]" 507 | }, 508 | "condition": "[equals(parameters('applicationInsightsOption'), 'new')]" 509 | }, 510 | { 511 | "type": "Microsoft.MachineLearningServices/workspaces", 512 | "apiVersion": "2022-12-01-preview", 513 | "name": "[parameters('workspaceName')]", 514 | "location": "[variables('location')]", 515 | "dependsOn": [ 516 | "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]", 517 | "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]", 518 | "[resourceId('Microsoft.Insights/components', parameters('applicationInsightsName'))]", 519 | "[resourceId('Microsoft.ContainerRegistry/registries', parameters('containerRegistryName'))]" 520 | ], 521 | "tags": "[parameters('tagValues')]", 522 | "sku": { 523 | "tier": "[parameters('sku')]", 524 | "name": "[parameters('sku')]" 525 | }, 526 | "kind": "[parameters('kind')]", 527 | "identity": { 528 | "type": "[parameters('identityType')]", 529 | "userAssignedIdentities": "[if(equals(parameters('identityType'), 'userAssigned'), variables('userAssignedIdentities'), json('null'))]" 530 | }, 531 | "properties": { 532 | "friendlyName": "[parameters('workspaceName')]", 533 | "description": "", 534 | "storageAccount": "[variables('storageAccount')]", 535 | "keyVault": "[variables('keyVault')]", 536 | "applicationInsights": "[if(not(equals(parameters('applicationInsightsOption'), 'none')), variables('applicationInsights'), json('null'))]", 537 | "containerRegistry": "[if(not(equals(parameters('containerRegistryOption'), 'none')), variables('containerRegistry'), json('null'))]", 538 | "primaryUserAssignedIdentity": "[if(equals(parameters('identityType'), 'userAssigned'), variables('primaryUserAssignedIdentity'), json('null'))]", 539 | "systemDatastoresAuthMode": "[if(not(equals(parameters('systemDatastoresAuthMode'), 'accessKey')), parameters('systemDatastoresAuthMode'), json('null'))]", 540 | "managedNetwork": "[parameters('managedNetwork')]", 541 | "publicNetworkAccess": "[parameters('publicNetworkAccess')]" 542 | } 543 | }, 544 | { 545 | "type": "Microsoft.Resources/deployments", 546 | "apiVersion": "2020-06-01", 547 | "name": "[variables('privateEndpointDeploymentName')]", 548 | "dependsOn": [ 549 | "[resourceId('Microsoft.MachineLearningServices/workspaces', parameters('workspaceName'))]" 550 | ], 551 | "properties": { 552 | "mode": "Incremental", 553 | "template": { 554 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 555 | "contentVersion": "1.0.0.0", 556 | "resources": [ 557 | { 558 | "apiVersion": "2020-06-01", 559 | "name": "[parameters('privateEndpointName')]", 560 | "type": "Microsoft.Network/privateEndpoints", 561 | "location": "[variables('location')]", 562 | "tags": "[parameters('tagValues')]", 563 | "properties": { 564 | "privateLinkServiceConnections": "[if(equals(parameters('privateEndpointType'), 'AutoApproval'), variables('defaultPEConnections'), json('null'))]", 565 | "manualPrivateLinkServiceConnections": "[if(equals(parameters('privateEndpointType'), 'ManualApproval'), variables('defaultPEConnections'), json('null'))]", 566 | "subnet": { 567 | "id": "[variables('subnet')]" 568 | } 569 | } 570 | } 571 | ] 572 | } 573 | }, 574 | "subscriptionId": "[parameters('privateEndpointSubscription')]", 575 | "resourceGroup": "[parameters('privateEndpointResourceGroupName')]", 576 | "condition": "[not(equals(parameters('privateEndpointType'), 'none'))]" 577 | } 578 | ], 579 | "outputs": {} 580 | } 581 | --------------------------------------------------------------------------------