├── .gitignore ├── sample_dags ├── email_on_failure.py ├── Branched_S3.py └── collect_hourly_data_from_api.py ├── README.md └── AirflowTemplate.yml /.gitignore: -------------------------------------------------------------------------------- 1 | .project 2 | .pydevproject 3 | -------------------------------------------------------------------------------- /sample_dags/email_on_failure.py: -------------------------------------------------------------------------------- 1 | """An example DAG that sends an alert email to specified addresses if a DAG run fails.""" 2 | 3 | from airflow import DAG 4 | from airflow.operators.python_operator import PythonOperator 5 | from datetime import datetime 6 | from datetime import timedelta 7 | 8 | 9 | # Airflow can send alert emails to user if a DAG run fails. The instructions how to set the configurations 10 | # for the email system to work can be found in https://github.com/CSCfi/airflow-openshift/blob/master/README.md 11 | 12 | 13 | default_args = { 14 | 'owner': 'your-username', 15 | 'start_date': datetime(2020, 1, 1), 16 | 'email': ['your-email@email.com', 17 | 'another-recipient@email.com'], # You can specify all recipients of the email as a list 18 | 'email_on_failure': True, # email_on_failure has to be set to True either in the DAG or in the individual task 19 | 'email_on_retry': True, # you can also specify if you want to get an email from a retried attempt 20 | 'retries': 1, 21 | 'retry_delay': timedelta(seconds=10), 22 | } 23 | 24 | 25 | def failing_function(): 26 | raise Exception('Failing or retried task should result in Airflow sending you an email') 27 | 28 | 29 | with DAG(dag_id='email_on_failure', default_args=default_args, schedule_interval='@once') as dag: 30 | 31 | failing_task = PythonOperator( 32 | task_id='failing_task', 33 | python_callable=failing_function 34 | ) 35 | -------------------------------------------------------------------------------- /sample_dags/Branched_S3.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.operators.python_operator import PythonOperator 3 | from airflow.operators.bash_operator import BashOperator 4 | from airflow.operators.python_operator import BranchPythonOperator 5 | from airflow.sensors.s3_key_sensor import S3KeySensor 6 | from datetime import datetime, timedelta 7 | from airflow.hooks.S3_hook import S3Hook 8 | import os 9 | 10 | default_args = { 11 | 'owner': 'your-username', 12 | 'start_date': datetime(2020, 1, 1), 13 | 'retry_delay': timedelta(minutes=5) 14 | } 15 | 16 | MAIN_DAG_NAME = 'Branch_Pipeline' 17 | 18 | # The directory /tmp/airflow is created by default in the template, to work with temporary file storage across all the workers 19 | 20 | def path_picker(): 21 | for fname in os.listdir('/tmp/airflow/'): 22 | if fname.endswith('.py'): 23 | return "branch_python" 24 | else: 25 | return "branch_java" 26 | 27 | 28 | def download_file_from_s3(bucket_name, key, filename): 29 | 30 | hook = S3Hook('s3conn') # You need to create the connection in the Airflow UI first ! 31 | client = hook.get_conn() 32 | client.download_file( 33 | Bucket=bucket_name, 34 | Key=key, 35 | Filename=filename 36 | ) 37 | 38 | 39 | # def upload_file_to_s3(bucket_name, key, filename): 40 | # localpath = '/tmp/airflow' 41 | # hook = S3Hook('s3conn') 42 | # hook.load_file( 43 | # filename=localpath + filename, 44 | # key=key, 45 | # bucket_name=bucket_name) 46 | 47 | 48 | # Using the context manager alllows you not to duplicate the dag parameter in each operator 49 | with DAG(MAIN_DAG_NAME, default_args=default_args, schedule_interval='@once') as main_dag: 50 | 51 | # Parameters required in S3: 52 | # 1. bucket_name: You need to create a bucket in your S3 first 53 | # 2. bucket_key (as a prefix) , for example, your file is called datafile.py, the key could be anything which sounds like a prefix, like dat* 54 | # 3. aws_conn_id: you need to create this via Airflow UI first 55 | 56 | inputsensor = S3KeySensor( 57 | task_id='check_s3_for_file_in_s3', 58 | bucket_key='dat*', 59 | wildcard_match=True, 60 | bucket_name='your-bucket-name', 61 | aws_conn_id='s3conn', 62 | timeout=18 * 60 * 60, 63 | poke_interval=30, 64 | dag=main_dag) 65 | 66 | 67 | # Parameters required in S3: 68 | # 1. bucket_name: You need to create a bucket in your S3 first 69 | # 2. key : a file called 'datafile.py' should be present in your bucket 70 | 71 | download_file_from_S3_task = PythonOperator( 72 | task_id='download_file_from_S3', 73 | depends_on_past=True, 74 | python_callable=download_file_from_s3, 75 | op_kwargs={ 76 | 'filename': '/tmp/airflow/datafile.py', # this will store it in a temp location created by default 77 | 'key': 'datafile.py', # This file needs to be present in your S3 Bucket, it can be empty as well 78 | 'bucket_name': 'your-bucket-name', 79 | }, 80 | dag=main_dag 81 | ) 82 | 83 | branch_task = BranchPythonOperator( 84 | task_id='run_this_first', 85 | python_callable=path_picker, 86 | trigger_rule="all_done", 87 | dag=main_dag) 88 | 89 | branch_python = BashOperator( 90 | task_id='branch_python', 91 | depends_on_past=True, 92 | bash_command='echo Python task called', 93 | dag=main_dag) 94 | 95 | branch_java = BashOperator( 96 | task_id='branch_java', 97 | depends_on_past=True, 98 | bash_command='echo Java task called', 99 | dag=main_dag) 100 | 101 | # Use arrows to set dependencies between tasks 102 | inputsensor >> download_file_from_S3_task 103 | download_file_from_S3_task >> branch_task 104 | branch_task >> branch_python 105 | branch_task >> branch_java -------------------------------------------------------------------------------- /sample_dags/collect_hourly_data_from_api.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from airflow import DAG 3 | from airflow.operators.python_operator import PythonOperator 4 | from airflow.operators.bash_operator import BashOperator 5 | from airflow.utils.dates import days_ago 6 | import requests 7 | import pandas as pd 8 | from datetime import datetime 9 | import json 10 | 11 | # This DAG is a skeleton template for a DAG that collects json-formatted data once per hour 12 | # from two endpoints of an API and joins the data in a pandas DataFrame, which is finally 13 | # stored as a csv file. 14 | # 15 | # Data is first fetched from the api and saved into a file in /tmp/airflow, which in this 16 | # deployment configuration is the place for temporary data storage. Here the final csv file 17 | # is also saved there, but you might want to create separate pvc for data that is meant to be stored 18 | # for longer. 19 | # 20 | # NOTE: This DAG requires that pip packages pandas and requests are installed 21 | 22 | API_URL_BASE = "https://api.com/" 23 | TMP_FOLDER = "/tmp/airflow/data" 24 | FILE_NAME_1 = "data1.json" 25 | FILE_NAME_2 = "data2.json" 26 | 27 | 28 | def download_data(endpoint, filename): 29 | """Downloads data from given API endpoint to temporary storage""" 30 | response = requests.get(API_URL_BASE + endpoint, stream=True) 31 | 32 | if response.status_code == 200: 33 | # Data is temporarily saved to /tmp/airflow, which is temporary storage for workers 34 | with open(f"{TMP_FOLDER}/{filename}", "wb") as fd: 35 | for _ in response.iter_content(): 36 | fd.write(_) 37 | else: 38 | raise Exception(f"Could not get the resource from endpoint {endpoint}. HTTP status code: {response.status_code}") 39 | 40 | 41 | def data_as_dict(path): 42 | """Opens given json file and turns it into a dictionary object""" 43 | with open(path) as f: 44 | return json.loads(f.read()) 45 | 46 | 47 | def transform_data1(): 48 | """Processes the data from the first API endpoint""" 49 | d = data_as_dict(f"{TMP_FOLDER}/{FILE_NAME_1}") 50 | df = pd.json_normalize(d) 51 | 52 | # Do your pandas magic here 53 | 54 | return df 55 | 56 | 57 | def transform_data2(): 58 | """Processes the data from the second API endpoint""" 59 | d = data_as_dict(f"{TMP_FOLDER}/{FILE_NAME_2}") 60 | df = pd.json_normalize(d) 61 | 62 | # Do your pandas magic here 63 | 64 | return df 65 | 66 | 67 | def data_to_csv(file_path): 68 | """Merges the data from both endpoints and saves it into the given location as a csv file""" 69 | df = pd.merge(transform_data1(), transform_data2()) 70 | 71 | # DataFrame is saved to specified location adding a timestamp to the file name 72 | df.to_csv(file_path + '-' + datetime.now().strftime('%Y-%m-%d-%H:%M')) 73 | 74 | 75 | default_args = { 76 | 'owner': 'airflow', 77 | 'start_date': days_ago(0, minute=1), 78 | 'retries': 4, 79 | 'retry_delay': timedelta(minutes=5), 80 | } 81 | 82 | # In schedule_interval, the times to execute the DAG is specified in a similar syntax as cronjobs 83 | # It would also possible to use for example timedelta(hours=1) 84 | with DAG(dag_id='get_data', default_args=default_args, schedule_interval='15 * * * *') as dag: 85 | 86 | t1 = BashOperator( 87 | task_id='make_tmp_folder', 88 | bash_command=f'mkdir -p {TMP_FOLDER}', 89 | ) 90 | 91 | t2 = PythonOperator( 92 | task_id='download_data1', 93 | python_callable=download_data, 94 | op_kwargs={'endpoint': 'api/data1', 'filename': FILE_NAME_1}, # You can give variables to a function with a dictionary 95 | ) 96 | 97 | t3 = PythonOperator( 98 | task_id='download_data2', 99 | python_callable=download_data, 100 | op_kwargs={'endpoint': 'api/data2', 'filename': FILE_NAME_2}, # Here we use the same function as in previous task, but with different variables 101 | ) 102 | 103 | t4 = PythonOperator( 104 | task_id='save_data', 105 | python_callable=data_to_csv, 106 | op_kwargs={'file_path': '/tmp/airflow'}, # You might want to create new pvc to store the data, instead of using volume meant to be temporary storage for workers 107 | ) 108 | 109 | t5 = BashOperator( 110 | task_id='delete_tmp_folder', 111 | bash_command=f'rm -r {TMP_FOLDER}', 112 | ) 113 | 114 | # Set up the dependencies between the tasks 115 | t1 >> [t2, t3] >> t4 >> t5 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # airflow-openshift 2 | The purpose of this template is to easily deploy [Apache Airflow](https://airflow.apache.org) on Openshift. 3 | 4 | ## Airflow container image 5 | The used Airflow container image is built using the official image source codes distributed by Airflow, built with the additional providers packages by Apache Spark, Papermill and Mongo. It should also be possible to build your own image and deploy the template using that image, as long as the image is built using the official sources. 6 | 7 | ## Useful variables 8 | 9 | The template can be imported via the Openshift web UI or added via the command line interface (oc) 10 | 11 | The required variables which need to be present in order to run the template are: 12 | 13 | - APPLICATION_NAME: A unique name identifying the airflow deployment 14 | - AUTHENTICATION_USERNAME: Username for the Airflow web UI authentication 15 | - AUTHENTICATION_PASSWORD: Password for the Airflow web UI authentication 16 | - JUPYTER_PASSWORD: For accessing the Jupyter web interface 17 | 18 | Rest of the variables are optional and have a value by default which can be changed, if needed. 19 | 20 | Some of the useful variables to you as the user could be: 21 | 22 | - WORKER_COUNT: The number of workers being deployed, by default 2 workers are deployed 23 | - WORKER_CPU: CPU of each worker deployed 24 | - WORKER_MEMORY: Memory of each worker deployed 25 | - PIP_REQUIREMENTS: Python requirements for your DAGs 26 | 27 | ## How to upload DAGs on the Airflow webserver 28 | 29 | The current template deploys a Jupyter pod for writing the python code for DAGs. The password will be the one you set in JUPYTER_PASSWORD variable. 30 | **Note** When accessing Jupyter, you need to click on **Upload** to upload an existing python code (.py extension) of the Airflow DAG or you could click on **New->Text File** and then write the python code in the text file itself, *but remember to save it with the .py extension* 31 | 32 | It can take up to 5 minutes for the DAGs to show up in the web UI, so be patient! 33 | 34 | ## How to install custom Python libraries 35 | - The most reliable way to include your DAG dependencies in the image is to build your own Airflow image using the official sources, and include the dependencies in the building phase of the image. More information about building the image can be found [here](https://airflow.apache.org/docs/docker-stack/build.html). After building the image, push it to some image registry, and set the image link to the AIRFLOW_IMAGE variable to deploy the template using that image. 36 | 37 | - You might also want to consider using PythonVirtualenvOperator, that creates a virtual environment for a task with the required pip packages, and tears it down after task completion. For more information about the PythonVirtualenvOperator, see the official documentation [here](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/python.html?highlight=pythonvirtualenvoperator#pythonvirtualenvoperator) and [here](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/operators/python/index.html?highlight=pythonvirtualenvoperator#airflow.operators.python.PythonVirtualenvOperator). 38 | 39 | - There is also an easy and fast way to install any python libraries when deploying the template, but it is error-prone and only recommended for testing. To use the easy method, you can: 40 | 41 | - Before Deployment: Use the variable **PIP_REQUIREMENTS**, where you can specify the name of the libraries separated by whitespace, for example `pandas scipy==1.5.4` 42 | 43 | - After Deployment: Edit the *configmap* **pip-requirements** and add your requirements there, similarly separated by whitespace. **NOTE** - when using this option, you need to redeploy the *scheduler* and *worker* deployments for the changes to take place! 44 | 45 | However, this is a fragile and unrecommended way to install the dependencies, and may result in error. 46 | 47 | ## Setting configuration variables 48 | If you want to change the Airflow configuration, the best way to do it is to add new environment variables in the deployment configs of the pods. Be aware that some variables have to be set in the worker pods, while some have to be set in the webserver pod for the effect to take place! For more information about configuring Airflow with environment variables, check the official documentation [here](https://airflow.apache.org/docs/stable/howto/set-config.html). For a list of all available Airflow configurations, see [here](https://airflow.apache.org/docs/stable/configurations-ref.html). 49 | 50 | ## Configuring email host 51 | Airflow can be configured to send emails: you can both send custom emails through Airflow as a task, or receive alert emails yourself if one of your DAG runs have failed. For the email system to work the following configuration variables have to be set in the deployment config of *worker*: 52 | * AIRFLOW__SMTP__SMTP_HOST 53 | * AIRFLOW__SMTP__SMTP_USER 54 | * AIRFLOW__SMTP__SMTP_PORT 55 | * AIRFLOW__SMTP__SMTP_MAIL_FROM 56 | 57 | And, if the smtp host requires it: 58 | * AIRFLOW__SMTP__SMTP_PASSWORD 59 | 60 | If you need CSC specific configuration, contact servicedesk@csc.fi. 61 | 62 | To use a Google Gmail account as the email host, you first have to create an App Password to your account. To set up an App Password, follow instructions in . 63 | 64 | When you have the password, enter these as environment variables in the worker deployment config: 65 | * AIRFLOW__SMTP__SMTP_HOST = smtp.gmail.com 66 | * AIRFLOW__SMTP__SMTP_USER = \ 67 | * AIRFLOW__SMTP__SMTP_PASSWORD = \ 68 | * AIRFLOW__SMTP__SMTP_PORT = 587 69 | * AIRFLOW__SMTP__SMTP_MAIL_FROM = \ 70 | 71 | ## How to create a connection to a custom S3 object Store 72 | 73 | Create a connection via the Airflow web UI by clicking on *Admin->Connections* , then fill the following fields: 74 | 75 | * Conn Id: use a unique id for the connection. When interacting with S3, you need to pass this id in your DAG Python code 76 | * Conn Type: S3 77 | * Extra: `{"aws_access_key_id":"your-access-key-id", "aws_secret_access_key": "your-secret-access-key", "host": "the-s3-endpoint-url"}` -------------------------------------------------------------------------------- /AirflowTemplate.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Template 3 | labels: 4 | template: airflow 5 | message: |- 6 | To access the Airflow web UI, open the Route for the 'webserver' deployment. 7 | To upload/write the DAGs, open the Route for the 'jupyter' deployment. 8 | metadata: 9 | annotations: 10 | description: Deploys Apache Airflow on Openshift. 11 | 12 | 13 | Required variables - 'Application name', 'Airflow web UI username', 'Airflow web UI password' and 'Jupyter password'. Rest of the variables are optional and are filled/generated with default/auto values, if you do not explicitly provide a value. 14 | 15 | 16 | Dependencies - Before deployment you can list all required Python pip packages in the 'Python pip requirements' parameter, separated by whitespace. After deployment you can edit your Python pip requirements in the 'pip-requirements' config-map, similarly separated by whitespace, and redeploy the worker and scheduler pods. Note however, that this is a fragile and unrecommended way to set up dependencies. Consider using PythonVirtualenvOperator, that lets you load needed libraries individually for the tasks that need them. If more complicated dependencies are needed, consider building the image yourself according to Airflow documentation, and use that image as the basis for this deployment by setting the image link to the 'Airflow image link' parameter. 17 | 18 | 19 | Uploading/writing DAGs - To upload or write DAGs, open the route for the 'jupyter' deployment. Once uploaded or written through the jupyter user interface, the DAGs should appear to the Airflow web UI within about 5 minutes. 20 | 21 | 22 | By default, the setup deploys the Airflow webserver backed by 2 Celery workers. 23 | The configuration for the worker pods can be changed according to the Openshift Quota(Limit Range). To get more quota, contact Openshift admins. 24 | 25 | 26 | WARNING - This deployment setup is still in the experimental stage. 27 | iconClass: icon-datavirt 28 | openshift.io/display-name: Apache Airflow 29 | openshift.io/documentation-url: https://github.com/CSCfi/airflow-openshift 30 | openshift.io/support-url: https://www.csc.fi/contact-info 31 | openshift.io/long-description: Apache Airflow (or simply Airflow) is a platform to programmatically author, schedule, and monitor workflows. 32 | 33 | The workflows are defined as code, so that they become more maintainable, versionable, testable, and collaborative. 34 | 35 | Airflow is used to author workflows as directed acyclic graphs (DAGs) of tasks. The Airflow scheduler executes your tasks on an array of workers while following the specified dependencies. 36 | 37 | The rich user interface makes it easy to visualize pipelines running in production, monitor progress, and troubleshoot issues when needed. 38 | openshift.io/provider-display-name: CSC 39 | tags: python, data pipelines, orchestration platform 40 | template.openshift.io/bindable: "false" 41 | name: apache-airflow 42 | 43 | objects: 44 | 45 | - apiVersion: v1 46 | kind: DeploymentConfig 47 | metadata: 48 | labels: 49 | app: ${APPLICATION_NAME} 50 | name: flower 51 | spec: 52 | replicas: 1 53 | selector: 54 | app: ${APPLICATION_NAME} 55 | deploymentconfig: flower 56 | strategy: 57 | type: Rolling 58 | template: 59 | metadata: 60 | labels: 61 | app: ${APPLICATION_NAME} 62 | deploymentconfig: flower 63 | spec: 64 | initContainers: 65 | - image: docker-registry.rahti.csc.fi/da-images/busybox:1 66 | name: fernet-key-waiter 67 | command: ['sh', '-c', 'while [ ! -f /tmp/fernet_key/fernet_key.txt ]; do sleep 1; done' ] 68 | volumeMounts: 69 | - name: fernet-key-vol 70 | mountPath: /tmp/fernet_key 71 | containers: 72 | - env: 73 | - name: FLOWER_PORT 74 | value: '5555' 75 | - name: FLOWER_HOST 76 | value: flower 77 | - name : AIRFLOW__CORE__EXECUTOR 78 | value: CeleryExecutor 79 | - name: FLOWER_BASIC_AUTH 80 | valueFrom: 81 | secretKeyRef: 82 | key: flower_basic_auth 83 | name: flower-auth 84 | - name: AIRFLOW__CELERY__BROKER_URL 85 | valueFrom: 86 | secretKeyRef: 87 | key: broker-url 88 | name: redis 89 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 90 | valueFrom: 91 | secretKeyRef: 92 | key: connection-string 93 | name: postgresql 94 | - name: AIRFLOW__CELERY__RESULT_BACKEND 95 | valueFrom: 96 | secretKeyRef: 97 | key: result-backend 98 | name: postgresql 99 | - name: AIRFLOW__CORE__FERNET_KEY_CMD 100 | value: "cat /tmp/fernet_key/fernet_key.txt" 101 | image: ${AIRFLOW_IMAGE} 102 | args: 103 | - celery 104 | - flower 105 | imagePullPolicy: Always 106 | name: flower 107 | ports: 108 | - containerPort: 8080 109 | protocol: TCP 110 | - containerPort: 8793 111 | protocol: TCP 112 | - containerPort: 5555 113 | protocol: TCP 114 | resources: {} 115 | volumeMounts: 116 | - mountPath: /tmp/fernet_key 117 | name: fernet-key-vol 118 | volumes: 119 | - name: fernet-key-vol 120 | persistentVolumeClaim: 121 | claimName: fernet-key-pvc 122 | dnsPolicy: ClusterFirst 123 | restartPolicy: Always 124 | schedulerName: default-scheduler 125 | securityContext: {} 126 | terminationGracePeriodSeconds: 30 127 | triggers: 128 | - type: ConfigChange 129 | 130 | - apiVersion: v1 131 | kind: DeploymentConfig 132 | metadata: 133 | labels: 134 | app: ${APPLICATION_NAME} 135 | template: postgresql-ephemeral-template 136 | name: ${POSTGRESQL_HOST} 137 | spec: 138 | replicas: 1 139 | selector: 140 | name: ${POSTGRESQL_HOST} 141 | strategy: 142 | type: Recreate 143 | template: 144 | metadata: 145 | labels: 146 | name: ${POSTGRESQL_HOST} 147 | spec: 148 | containers: 149 | - env: 150 | - name: POSTGRESQL_USER 151 | valueFrom: 152 | secretKeyRef: 153 | key: database-user 154 | name: postgresql 155 | - name: POSTGRESQL_PASSWORD 156 | valueFrom: 157 | secretKeyRef: 158 | key: database-password 159 | name: postgresql 160 | - name: POSTGRESQL_DATABASE 161 | valueFrom: 162 | secretKeyRef: 163 | key: database-name 164 | name: postgresql 165 | image: centos/postgresql-12-centos7:1 166 | imagePullPolicy: IfNotPresent 167 | livenessProbe: 168 | failureThreshold: 3 169 | initialDelaySeconds: 30 170 | periodSeconds: 10 171 | successThreshold: 1 172 | tcpSocket: 173 | port: 5432 174 | timeoutSeconds: 1 175 | name: ${POSTGRESQL_HOST} 176 | ports: 177 | - containerPort: 5432 178 | protocol: TCP 179 | readinessProbe: 180 | exec: 181 | command: 182 | - /bin/sh 183 | - -i 184 | - -c 185 | - psql -h 127.0.0.1 -U $POSTGRESQL_USER -q -d $POSTGRESQL_DATABASE -c 186 | 'SELECT 1' 187 | failureThreshold: 3 188 | initialDelaySeconds: 20 189 | periodSeconds: 10 190 | successThreshold: 1 191 | timeoutSeconds: 1 192 | resources: 193 | limits: 194 | memory: 1Gi 195 | securityContext: 196 | capabilities: {} 197 | privileged: false 198 | volumeMounts: 199 | - mountPath: /var/lib/pgsql/data 200 | name: postgresql-data 201 | dnsPolicy: ClusterFirst 202 | restartPolicy: Always 203 | schedulerName: default-scheduler 204 | securityContext: {} 205 | terminationGracePeriodSeconds: 30 206 | volumes: 207 | - name: postgresql-data 208 | persistentVolumeClaim: 209 | claimName: ${PERSISTENT_VOLUME_CLAIM_DB} 210 | triggers: 211 | - imageChangeParams: 212 | automatic: true 213 | containerNames: 214 | - ${POSTGRESQL_HOST} 215 | from: 216 | kind: ImageStreamTag 217 | name: postgresql:12 218 | namespace: openshift 219 | type: ImageChange 220 | - type: ConfigChange 221 | 222 | - apiVersion: v1 223 | kind: DeploymentConfig 224 | metadata: 225 | labels: 226 | app: ${APPLICATION_NAME} 227 | template: redis-ephemeral-template 228 | name: ${REDIS_HOST} 229 | spec: 230 | replicas: 1 231 | selector: 232 | name: ${REDIS_HOST} 233 | strategy: 234 | activeDeadlineSeconds: 21600 235 | recreateParams: 236 | timeoutSeconds: 600 237 | resources: {} 238 | type: Recreate 239 | template: 240 | metadata: 241 | labels: 242 | name: ${REDIS_HOST} 243 | spec: 244 | containers: 245 | - env: 246 | - name: REDIS_PASSWORD 247 | valueFrom: 248 | secretKeyRef: 249 | key: database-password 250 | name: redis 251 | image: registry.access.redhat.com/rhscl/redis-32-rhel7@sha256:50605070421172c6c41e03bcb4391f418240085f8e03f0f82190da75e51df9e3 252 | imagePullPolicy: IfNotPresent 253 | livenessProbe: 254 | failureThreshold: 3 255 | initialDelaySeconds: 30 256 | periodSeconds: 10 257 | successThreshold: 1 258 | tcpSocket: 259 | port: 6379 260 | timeoutSeconds: 1 261 | name: ${REDIS_HOST} 262 | ports: 263 | - containerPort: 6379 264 | protocol: TCP 265 | readinessProbe: 266 | exec: 267 | command: 268 | - /bin/sh 269 | - -i 270 | - -c 271 | - test "$(redis-cli -h 127.0.0.1 -a $REDIS_PASSWORD ping)" == "PONG" 272 | failureThreshold: 3 273 | initialDelaySeconds: 5 274 | periodSeconds: 10 275 | successThreshold: 1 276 | timeoutSeconds: 1 277 | resources: 278 | limits: 279 | memory: 1Gi 280 | securityContext: 281 | capabilities: {} 282 | privileged: false 283 | volumeMounts: 284 | - mountPath: /var/lib/redis/data 285 | name: redis-data 286 | dnsPolicy: ClusterFirst 287 | restartPolicy: Always 288 | schedulerName: default-scheduler 289 | securityContext: {} 290 | terminationGracePeriodSeconds: 30 291 | volumes: 292 | - emptyDir: {} 293 | name: redis-data 294 | test: false 295 | triggers: 296 | - imageChangeParams: 297 | automatic: true 298 | containerNames: 299 | - ${REDIS_HOST} 300 | from: 301 | kind: ImageStreamTag 302 | name: redis:3.2 303 | namespace: openshift 304 | type: ImageChange 305 | - type: ConfigChange 306 | 307 | - apiVersion: v1 308 | kind: DeploymentConfig 309 | metadata: 310 | labels: 311 | app: ${APPLICATION_NAME} 312 | name: webserver 313 | spec: 314 | replicas: 1 315 | revisionHistoryLimit: 10 316 | selector: 317 | app: ${APPLICATION_NAME} 318 | deploymentconfig: webserver 319 | strategy: 320 | activeDeadlineSeconds: 21600 321 | resources: {} 322 | rollingParams: 323 | intervalSeconds: 1 324 | maxSurge: 25% 325 | maxUnavailable: 25% 326 | timeoutSeconds: 600 327 | updatePeriodSeconds: 1 328 | type: Rolling 329 | template: 330 | metadata: 331 | labels: 332 | app: ${APPLICATION_NAME} 333 | deploymentconfig: webserver 334 | spec: 335 | initContainers: 336 | - image: docker-registry.rahti.csc.fi/airflow-image/fernet-key-generator:latest 337 | name: fernet-key-generator 338 | command: ["python3","create_fernet_key.py"] 339 | volumeMounts: 340 | - name: fernet-key-vol 341 | mountPath: /tmp/fernet_key 342 | containers: 343 | - name: webserver 344 | env: 345 | - name : AIRFLOW__CORE__EXECUTOR 346 | value: CeleryExecutor 347 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 348 | valueFrom: 349 | secretKeyRef: 350 | key: connection-string 351 | name: postgresql 352 | - name: _AIRFLOW_DB_UPGRADE 353 | value: "true" 354 | - name: _AIRFLOW_WWW_USER_CREATE 355 | value: "true" 356 | - name: _AIRFLOW_WWW_USER_USERNAME 357 | value: ${AUTHENTICATION_USERNAME} 358 | - name: _AIRFLOW_WWW_USER_PASSWORD 359 | value: ${AUTHENTICATION_PASSWORD} 360 | - name: AIRFLOW__CELERY__BROKER_URL 361 | valueFrom: 362 | secretKeyRef: 363 | key: broker-url 364 | name: redis 365 | - name: AIRFLOW__CELERY__RESULT_BACKEND 366 | valueFrom: 367 | secretKeyRef: 368 | key: result-backend 369 | name: postgresql 370 | - name: AIRFLOW_HOME 371 | value: "/opt/airflow" 372 | - name: HOME 373 | value: "/opt/airflow" 374 | - name: AIRFLOW__CORE__FERNET_KEY_CMD 375 | value: "cat /tmp/fernet_key/fernet_key.txt" 376 | image: ${AIRFLOW_IMAGE} 377 | args: 378 | - webserver 379 | imagePullPolicy: Always 380 | livenessProbe: 381 | httpGet: 382 | path: / 383 | port: 8080 384 | initialDelaySeconds: 120 385 | timeoutSeconds: 30 386 | ports: 387 | - containerPort: 5555 388 | protocol: TCP 389 | - containerPort: 8080 390 | protocol: TCP 391 | - containerPort: 8793 392 | protocol: TCP 393 | resources: 394 | limits: 395 | cpu: '1' 396 | memory: 2Gi 397 | requests: 398 | cpu: '1' 399 | memory: 2Gi 400 | volumeMounts: 401 | - mountPath: "/opt/airflow/dags" 402 | name: airpod-dag-vol 403 | - mountPath: "/opt/airflow/logs" 404 | name: airpod-log-vol 405 | - mountPath: /pip-requirements.txt 406 | name: pip-requirements-vol 407 | subPath: pip-requirements.txt 408 | - mountPath: /tmp/fernet_key 409 | name: fernet-key-vol 410 | dnsPolicy: ClusterFirst 411 | restartPolicy: Always 412 | schedulerName: default-scheduler 413 | securityContext: {} 414 | terminationGracePeriodSeconds: 30 415 | volumes: 416 | - name: airpod-dag-vol 417 | persistentVolumeClaim: 418 | claimName: ${PERSISTENT_VOLUME_CLAIM_DAG} 419 | - name: airpod-log-vol 420 | persistentVolumeClaim: 421 | claimName: ${PERSISTENT_VOLUME_CLAIM_LOG} 422 | - name: fernet-key-vol 423 | persistentVolumeClaim: 424 | claimName: fernet-key-pvc 425 | - configMap: 426 | defaultMode: 420 427 | items: 428 | - key: pip-requirements.txt 429 | path: pip-requirements.txt 430 | name: pip-requirements 431 | name: pip-requirements-vol 432 | test: false 433 | triggers: 434 | - type: ConfigChange 435 | 436 | - apiVersion: v1 437 | kind: DeploymentConfig 438 | metadata: 439 | labels: 440 | app: ${APPLICATION_NAME} 441 | name: scheduler 442 | spec: 443 | replicas: 1 444 | revisionHistoryLimit: 10 445 | selector: 446 | app: ${APPLICATION_NAME} 447 | deploymentconfig: scheduler 448 | strategy: 449 | activeDeadlineSeconds: 21600 450 | resources: {} 451 | rollingParams: 452 | intervalSeconds: 1 453 | maxSurge: 25% 454 | maxUnavailable: 25% 455 | timeoutSeconds: 600 456 | updatePeriodSeconds: 1 457 | type: Rolling 458 | template: 459 | metadata: 460 | labels: 461 | app: ${APPLICATION_NAME} 462 | deploymentconfig: scheduler 463 | spec: 464 | initContainers: 465 | - image: docker-registry.rahti.csc.fi/airflow-image/fernet-key-generator:latest 466 | name: fernet-key-generator 467 | command: ["python3","create_fernet_key.py"] 468 | volumeMounts: 469 | - name: fernet-key-vol 470 | mountPath: /tmp/fernet_key 471 | containers: 472 | - name: scheduler 473 | env: 474 | - name : AIRFLOW__CORE__EXECUTOR 475 | value: CeleryExecutor 476 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 477 | valueFrom: 478 | secretKeyRef: 479 | key: connection-string 480 | name: postgresql 481 | - name: AIRFLOW__CELERY__BROKER_URL 482 | valueFrom: 483 | secretKeyRef: 484 | key: broker-url 485 | name: redis 486 | - name: AIRFLOW__CELERY__RESULT_BACKEND 487 | valueFrom: 488 | secretKeyRef: 489 | key: result-backend 490 | name: postgresql 491 | - name: AIRFLOW_HOME 492 | value: "/opt/airflow" 493 | - name: HOME 494 | value: "/opt/airflow" 495 | - name: _PIP_ADDITIONAL_REQUIREMENTS 496 | valueFrom: 497 | configMapKeyRef: 498 | name: pip-requirements 499 | key: pip-requirements.txt 500 | image: ${AIRFLOW_IMAGE} 501 | args: 502 | - scheduler 503 | imagePullPolicy: Always 504 | ports: 505 | - containerPort: 5555 506 | protocol: TCP 507 | - containerPort: 8080 508 | protocol: TCP 509 | - containerPort: 8793 510 | protocol: TCP 511 | resources: 512 | limits: 513 | cpu: '1' 514 | memory: 2Gi 515 | requests: 516 | cpu: '1' 517 | memory: 2Gi 518 | volumeMounts: 519 | - mountPath: "/opt/airflow/dags" 520 | name: airpod-dag-vol 521 | - mountPath: "/opt/airflow/logs" 522 | name: airpod-log-vol 523 | - mountPath: /pip-requirements.txt 524 | name: pip-requirements-vol 525 | subPath: pip-requirements.txt 526 | - mountPath: /tmp/fernet_key 527 | name: fernet-key-vol 528 | dnsPolicy: ClusterFirst 529 | restartPolicy: Always 530 | schedulerName: default-scheduler 531 | securityContext: {} 532 | terminationGracePeriodSeconds: 30 533 | volumes: 534 | - name: airpod-dag-vol 535 | persistentVolumeClaim: 536 | claimName: ${PERSISTENT_VOLUME_CLAIM_DAG} 537 | - name: airpod-log-vol 538 | persistentVolumeClaim: 539 | claimName: ${PERSISTENT_VOLUME_CLAIM_LOG} 540 | - name: fernet-key-vol 541 | persistentVolumeClaim: 542 | claimName: fernet-key-pvc 543 | - configMap: 544 | defaultMode: 420 545 | items: 546 | - key: pip-requirements.txt 547 | path: pip-requirements.txt 548 | name: pip-requirements 549 | name: pip-requirements-vol 550 | test: false 551 | triggers: 552 | - type: ConfigChange 553 | 554 | - apiVersion: v1 555 | kind: DeploymentConfig 556 | metadata: 557 | labels: 558 | app: ${APPLICATION_NAME} 559 | name: worker 560 | spec: 561 | replicas: ${WORKER_COUNT} 562 | revisionHistoryLimit: 10 563 | selector: 564 | app: ${APPLICATION_NAME} 565 | deploymentconfig: worker 566 | strategy: 567 | type: Rolling 568 | template: 569 | metadata: 570 | labels: 571 | app: ${APPLICATION_NAME} 572 | deploymentconfig: worker 573 | spec: 574 | initContainers: 575 | - image: docker-registry.rahti.csc.fi/da-images/busybox:1 576 | name: fernet-key-waiter 577 | command: ['sh', '-c', 'while [ ! -f /tmp/fernet_key/fernet_key.txt ]; do sleep 1; done' ] 578 | volumeMounts: 579 | - name: fernet-key-vol 580 | mountPath: /tmp/fernet_key 581 | containers: 582 | - env: 583 | - name : AIRFLOW__CORE__EXECUTOR 584 | value: CeleryExecutor 585 | - name: AIRFLOW__CELERY__BROKER_URL 586 | valueFrom: 587 | secretKeyRef: 588 | key: broker-url 589 | name: redis 590 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 591 | valueFrom: 592 | secretKeyRef: 593 | key: connection-string 594 | name: postgresql 595 | - name: C_FORCE_ROOT 596 | value: 'true' 597 | - name: AIRFLOW_HOME 598 | value: "/opt/airflow" 599 | - name: HOME 600 | value: "/opt/airflow" 601 | - name: _PIP_ADDITIONAL_REQUIREMENTS 602 | valueFrom: 603 | configMapKeyRef: 604 | name: pip-requirements 605 | key: pip-requirements.txt 606 | - name: AIRFLOW__CORE__FERNET_KEY_CMD 607 | value: "cat /tmp/fernet_key/fernet_key.txt" 608 | - name: AIRFLOW__CELERY__RESULT_BACKEND 609 | valueFrom: 610 | secretKeyRef: 611 | key: result-backend 612 | name: postgresql 613 | image: ${AIRFLOW_IMAGE} 614 | args: 615 | - celery 616 | - worker 617 | imagePullPolicy: Always 618 | name: worker 619 | ports: 620 | - containerPort: 5555 621 | protocol: TCP 622 | - containerPort: 8080 623 | protocol: TCP 624 | - containerPort: 8793 625 | protocol: TCP 626 | resources: 627 | requests: 628 | cpu: ${WORKER_CPU} 629 | memory: ${WORKER_MEMORY} 630 | volumeMounts: 631 | - mountPath: "/opt/airflow/dags" 632 | name: airpod-dag-vol 633 | - mountPath: "/opt/airflow/logs" 634 | name: airpod-log-vol 635 | - mountPath: /pip-requirements.txt 636 | name: pip-requirements-vol 637 | subPath: pip-requirements.txt 638 | - mountPath: '/tmp/airflow' 639 | name: airpod-tmp-worker-vol 640 | - mountPath: /tmp/fernet_key 641 | name: fernet-key-vol 642 | dnsPolicy: ClusterFirst 643 | restartPolicy: Always 644 | schedulerName: default-scheduler 645 | securityContext: {} 646 | terminationGracePeriodSeconds: 30 647 | volumes: 648 | - name: airpod-dag-vol 649 | persistentVolumeClaim: 650 | claimName: ${PERSISTENT_VOLUME_CLAIM_DAG} 651 | - name: airpod-log-vol 652 | persistentVolumeClaim: 653 | claimName: ${PERSISTENT_VOLUME_CLAIM_LOG} 654 | - name: fernet-key-vol 655 | persistentVolumeClaim: 656 | claimName: fernet-key-pvc 657 | - configMap: 658 | defaultMode: 420 659 | items: 660 | - key: pip-requirements.txt 661 | path: pip-requirements.txt 662 | name: pip-requirements 663 | name: pip-requirements-vol 664 | - name: airpod-tmp-worker-vol 665 | persistentVolumeClaim: 666 | claimName: ${PERSISTENT_VOLUME_CLAIM_TMP_WORKER} 667 | test: false 668 | triggers: 669 | - type: ConfigChange 670 | 671 | - apiVersion: v1 672 | kind: DeploymentConfig 673 | metadata: 674 | labels: 675 | app: ${APPLICATION_NAME} 676 | name: jupyter 677 | spec: 678 | replicas: 1 679 | selector: 680 | app: ${APPLICATION_NAME} 681 | deploymentconfig: jupyter 682 | strategy: 683 | type: Rolling 684 | template: 685 | metadata: 686 | labels: 687 | app: ${APPLICATION_NAME} 688 | deploymentconfig: jupyter 689 | spec: 690 | containers: 691 | - env: 692 | - name: JUPYTER_NOTEBOOK_PASSWORD 693 | value: ${JUPYTER_PASSWORD} 694 | image: >- 695 | quay.io/jupyteronopenshift/s2i-minimal-notebook-py35@sha256:e6f032b57a483b98059eb3e9f4081b67e7224d22e06bddf6700d88ceab7478c3 696 | imagePullPolicy: Always 697 | livenessProbe: 698 | httpGet: 699 | path: / 700 | port: 8080 701 | initialDelaySeconds: 30 702 | timeoutSeconds: 30 703 | name: s2i-minimal-notebook-py3 704 | ports: 705 | - containerPort: 8080 706 | protocol: TCP 707 | resources: {} 708 | volumeMounts: 709 | - mountPath: /opt/app-root/src 710 | name: airpod-dag-vol 711 | volumes: 712 | - name: airpod-dag-vol 713 | persistentVolumeClaim: 714 | claimName: ${PERSISTENT_VOLUME_CLAIM_DAG} 715 | triggers: 716 | - type: ConfigChange 717 | 718 | - apiVersion: v1 719 | data: 720 | pip-requirements.txt: ${PIP_REQUIREMENTS} 721 | kind: ConfigMap 722 | metadata: 723 | name: pip-requirements 724 | 725 | - apiVersion: v1 726 | stringData: 727 | flower_basic_auth: ${FLOWER_USER}:${FLOWER_PASSWORD} 728 | flower_username: ${FLOWER_USER} 729 | flower_password: ${FLOWER_PASSWORD} 730 | kind: Secret 731 | metadata: 732 | name: flower-auth 733 | type: Opaque 734 | 735 | - apiVersion: v1 736 | stringData: 737 | database-name: ${POSTGRESQL_DATABASE} 738 | database-password: ${POSTGRESQL_PASSWORD} 739 | database-user: ${POSTGRESQL_USER} 740 | connection-string: postgresql+psycopg2://${POSTGRESQL_USER}:${POSTGRESQL_PASSWORD}@${POSTGRESQL_HOST}:5432/${POSTGRESQL_DATABASE} 741 | result-backend: db+postgresql://${POSTGRESQL_USER}:${POSTGRESQL_PASSWORD}@${POSTGRESQL_HOST}:5432/${POSTGRESQL_DATABASE} 742 | kind: Secret 743 | metadata: 744 | labels: 745 | app: ${APPLICATION_NAME} 746 | template: postgresql-ephemeral-template 747 | name: postgresql 748 | type: Opaque 749 | 750 | - apiVersion: v1 751 | stringData: 752 | database-password: ${REDIS_PASSWORD} 753 | broker-url: redis://:${REDIS_PASSWORD}@${REDIS_HOST}:6379/1 754 | kind: Secret 755 | metadata: 756 | labels: 757 | app: ${APPLICATION_NAME} 758 | template: redis-ephemeral-template 759 | name: redis 760 | type: Opaque 761 | 762 | - apiVersion: v1 763 | kind: Service 764 | metadata: 765 | labels: 766 | app: ${APPLICATION_NAME} 767 | name: flower 768 | spec: 769 | ports: 770 | - name: 5555-tcp 771 | port: 5555 772 | protocol: TCP 773 | targetPort: 5555 774 | - name: 8080-tcp 775 | port: 8080 776 | protocol: TCP 777 | targetPort: 8080 778 | - name: 8793-tcp 779 | port: 8793 780 | protocol: TCP 781 | targetPort: 8793 782 | selector: 783 | app: ${APPLICATION_NAME} 784 | deploymentconfig: flower 785 | sessionAffinity: None 786 | type: ClusterIP 787 | status: 788 | loadBalancer: {} 789 | 790 | - apiVersion: v1 791 | kind: Service 792 | metadata: 793 | labels: 794 | app: ${APPLICATION_NAME} 795 | template: postgresql-ephemeral-template 796 | name: ${POSTGRESQL_HOST} 797 | spec: 798 | ports: 799 | - name: ${POSTGRESQL_HOST} 800 | port: 5432 801 | protocol: TCP 802 | targetPort: 5432 803 | selector: 804 | name: ${POSTGRESQL_HOST} 805 | sessionAffinity: None 806 | type: ClusterIP 807 | status: 808 | loadBalancer: {} 809 | 810 | - apiVersion: v1 811 | kind: Service 812 | metadata: 813 | labels: 814 | app: ${APPLICATION_NAME} 815 | template: redis-ephemeral-template 816 | name: ${REDIS_HOST} 817 | spec: 818 | ports: 819 | - name: ${REDIS_HOST} 820 | port: 6379 821 | protocol: TCP 822 | targetPort: 6379 823 | selector: 824 | name: ${REDIS_HOST} 825 | sessionAffinity: None 826 | type: ClusterIP 827 | status: 828 | loadBalancer: {} 829 | 830 | - apiVersion: v1 831 | kind: Service 832 | metadata: 833 | labels: 834 | app: ${APPLICATION_NAME} 835 | name: scheduler 836 | spec: 837 | ports: 838 | - name: 5555-tcp 839 | port: 5555 840 | protocol: TCP 841 | targetPort: 5555 842 | - name: 8080-tcp 843 | port: 8080 844 | protocol: TCP 845 | targetPort: 8080 846 | - name: 8793-tcp 847 | port: 8793 848 | protocol: TCP 849 | targetPort: 8793 850 | selector: 851 | app: ${APPLICATION_NAME} 852 | deploymentconfig: scheduler 853 | sessionAffinity: None 854 | type: ClusterIP 855 | status: 856 | loadBalancer: {} 857 | 858 | - apiVersion: v1 859 | kind: Service 860 | metadata: 861 | labels: 862 | app: ${APPLICATION_NAME} 863 | name: webserver 864 | spec: 865 | ports: 866 | - name: 5555-tcp 867 | port: 5555 868 | protocol: TCP 869 | targetPort: 5555 870 | - name: 8080-tcp 871 | port: 8080 872 | protocol: TCP 873 | targetPort: 8080 874 | - name: 8793-tcp 875 | port: 8793 876 | protocol: TCP 877 | targetPort: 8793 878 | selector: 879 | app: ${APPLICATION_NAME} 880 | deploymentconfig: webserver 881 | sessionAffinity: None 882 | type: ClusterIP 883 | status: 884 | loadBalancer: {} 885 | 886 | - apiVersion: v1 887 | kind: Service 888 | metadata: 889 | labels: 890 | app: ${APPLICATION_NAME} 891 | name: worker 892 | spec: 893 | ports: 894 | - name: 5555-tcp 895 | port: 5555 896 | protocol: TCP 897 | targetPort: 5555 898 | - name: 8080-tcp 899 | port: 8080 900 | protocol: TCP 901 | targetPort: 8080 902 | - name: 8793-tcp 903 | port: 8793 904 | protocol: TCP 905 | targetPort: 8793 906 | selector: 907 | app: ${APPLICATION_NAME} 908 | deploymentconfig: worker 909 | sessionAffinity: None 910 | type: ClusterIP 911 | status: 912 | loadBalancer: {} 913 | 914 | - apiVersion: v1 915 | kind: Service 916 | metadata: 917 | labels: 918 | app: ${APPLICATION_NAME} 919 | name: jupyter 920 | spec: 921 | ports: 922 | - name: 8080-tcp 923 | port: 8080 924 | protocol: TCP 925 | targetPort: 8080 926 | selector: 927 | app: ${APPLICATION_NAME} 928 | deploymentconfig: jupyter 929 | status: 930 | loadBalancer: {} 931 | 932 | - apiVersion: v1 933 | kind: Route 934 | metadata: 935 | labels: 936 | app: ${APPLICATION_NAME} 937 | name: ${APPLICATION_NAME}-flower 938 | spec: 939 | port: 940 | targetPort: 5555-tcp 941 | tls: 942 | insecureEdgeTerminationPolicy: Redirect 943 | termination: edge 944 | to: 945 | kind: Service 946 | name: flower 947 | weight: 100 948 | wildcardPolicy: None 949 | 950 | - apiVersion: v1 951 | kind: Route 952 | metadata: 953 | labels: 954 | app: ${APPLICATION_NAME} 955 | name: ${APPLICATION_NAME} 956 | spec: 957 | port: 958 | targetPort: 8080-tcp 959 | tls: 960 | insecureEdgeTerminationPolicy: Redirect 961 | termination: edge 962 | to: 963 | kind: Service 964 | name: webserver 965 | weight: 100 966 | wildcardPolicy: None 967 | 968 | - apiVersion: v1 969 | kind: Route 970 | metadata: 971 | labels: 972 | app: ${APPLICATION_NAME} 973 | name: ${APPLICATION_NAME}-jupyter 974 | spec: 975 | port: 976 | targetPort: 8080-tcp 977 | tls: 978 | insecureEdgeTerminationPolicy: Redirect 979 | termination: edge 980 | to: 981 | kind: Service 982 | name: jupyter 983 | weight: 100 984 | wildcardPolicy: None 985 | 986 | - apiVersion: "v1" 987 | kind: "PersistentVolumeClaim" 988 | metadata: 989 | name: ${PERSISTENT_VOLUME_CLAIM_DAG} 990 | spec: 991 | accessModes: 992 | - "ReadWriteOnce" 993 | resources: 994 | requests: 995 | storage: ${PERSISTENT_VOLUME_CLAIM_DAG_SIZE} 996 | 997 | - apiVersion: "v1" 998 | kind: "PersistentVolumeClaim" 999 | metadata: 1000 | name: ${PERSISTENT_VOLUME_CLAIM_LOG} 1001 | spec: 1002 | accessModes: 1003 | - "ReadWriteOnce" 1004 | resources: 1005 | requests: 1006 | storage: ${PERSISTENT_VOLUME_CLAIM_LOG_SIZE} 1007 | 1008 | - apiVersion: "v1" 1009 | kind: "PersistentVolumeClaim" 1010 | metadata: 1011 | name: ${PERSISTENT_VOLUME_CLAIM_DB} 1012 | spec: 1013 | accessModes: 1014 | - "ReadWriteOnce" 1015 | resources: 1016 | requests: 1017 | storage: ${PERSISTENT_VOLUME_CLAIM_DB_SIZE} 1018 | 1019 | - apiVersion: "v1" 1020 | kind: "PersistentVolumeClaim" 1021 | metadata: 1022 | name: ${PERSISTENT_VOLUME_CLAIM_TMP_WORKER} 1023 | spec: 1024 | accessModes: 1025 | - "ReadWriteOnce" 1026 | resources: 1027 | requests: 1028 | storage: ${PERSISTENT_VOLUME_CLAIM_TMP_WORKER_SIZE} 1029 | 1030 | - apiVersion: "v1" 1031 | kind: "PersistentVolumeClaim" 1032 | metadata: 1033 | name: "fernet-key-pvc" 1034 | spec: 1035 | accessModes: 1036 | - "ReadWriteMany" 1037 | resources: 1038 | requests: 1039 | storage: 1Mi 1040 | 1041 | parameters: 1042 | - description: Name of the Airflow application 1043 | displayName: Application name 1044 | name: APPLICATION_NAME 1045 | required: true 1046 | - description: Username for the Airflow web UI authentication 1047 | displayName: Airflow web UI username 1048 | name: AUTHENTICATION_USERNAME 1049 | required: true 1050 | - description: Password for the Airflow web UI authentication 1051 | displayName: Airflow web UI password 1052 | name: AUTHENTICATION_PASSWORD 1053 | required: true 1054 | - description: Password for accessing the Jupyter web interface used for writing/uploading DAGs 1055 | displayName: Jupyter password 1056 | name: JUPYTER_PASSWORD 1057 | required: true 1058 | - description: Number of Celery workers 1059 | displayName: Number of workers 1060 | name: WORKER_COUNT 1061 | value: "2" 1062 | - description: Celery worker CPU (check with your project limits) 1063 | displayName: Worker CPU 1064 | name: WORKER_CPU 1065 | value: "2" 1066 | - description: Celery worker memory size (check with your project limits) 1067 | displayName: Worker memory 1068 | name: WORKER_MEMORY 1069 | value: "2Gi" 1070 | - description: Python pip requirements needed for the DAGs, separated by whitespace. NOTE! This feature is fragile, and does not work for example for Airflow providers packages. For best results build your own Airflow image with the dependencies baked in. Consider also using PythonVirtualenvOperator. 1071 | displayName: Python pip requirements 1072 | name: PIP_REQUIREMENTS 1073 | value: "pandas scipy==1.5.1" 1074 | - description: Username for accessing the Flower web UI for Celery workers 1075 | displayName: Flower username 1076 | from: '[A-Z0-9]{12}' 1077 | generate: expression 1078 | name: FLOWER_USER 1079 | - description: Password for accessing the Flower web UI for Celery workers 1080 | displayName: Flower password 1081 | from: '[A-Z0-9]{12}' 1082 | generate: expression 1083 | name: FLOWER_PASSWORD 1084 | - description: PostgreSQL (Airflow metadata DB) host 1085 | displayName: PostgreSQL hostname 1086 | name: POSTGRESQL_HOST 1087 | value: postgresql 1088 | required: true 1089 | - description: Username for PostgreSQL user that will be used for accessing the database 1090 | displayName: PostgreSQL connection username 1091 | from: 'user[A-Z0-9]{5}' 1092 | generate: expression 1093 | name: POSTGRESQL_USER 1094 | required: true 1095 | - description: Password for the PostgreSQL connection user 1096 | displayName: PostgreSQL connection password 1097 | from: '[a-zA-Z0-9]{16}' 1098 | generate: expression 1099 | name: POSTGRESQL_PASSWORD 1100 | required: true 1101 | - description: Database name for PostgreSQL database 1102 | displayName: PostgreSQL connection database 1103 | from: 'airflow[A-Z0-9]{5}' 1104 | generate: expression 1105 | name: POSTGRESQL_DATABASE 1106 | required: true 1107 | - description: Redis hostname (to avoid issues with default naming in OpenShift) 1108 | displayName: Redis hostname 1109 | name: REDIS_HOST 1110 | value: redis 1111 | required: true 1112 | - description: Password for Redis database 1113 | displayName: Redis connection password 1114 | from: '[A-Z0-9]{15}' 1115 | generate: expression 1116 | name: REDIS_PASSWORD 1117 | required: true 1118 | - description: Airflow image link 1119 | displayName: Airflow image link 1120 | name: AIRFLOW_IMAGE 1121 | value: docker-registry.rahti.csc.fi/airflow-image/airflow-2-os:latest 1122 | required: true 1123 | - description: Attached PERSISTENT volume claim name for storing the DAGs 1124 | displayName: PERSISTENT volume claim name (DAGs) 1125 | name: PERSISTENT_VOLUME_CLAIM_DAG 1126 | value: air-dags-pvc 1127 | - description: Size of the pvc volume storing DAGs 1128 | displayName: DAG volume storage size 1129 | name: PERSISTENT_VOLUME_CLAIM_DAG_SIZE 1130 | value: "1Gi" 1131 | - description: Attached PERSISTENT volume claim name for storing the logs 1132 | displayName: PERSISTENT volume claim name (logs) 1133 | name: PERSISTENT_VOLUME_CLAIM_LOG 1134 | value: air-logs-pvc 1135 | - description: Size of the pvc volume storing logs 1136 | displayName: Logs volume storage size 1137 | name: PERSISTENT_VOLUME_CLAIM_LOG_SIZE 1138 | value: "10Gi" 1139 | - description: Attached PERSISTENT volume claim name for storing metadata in PostgreSQL database 1140 | displayName: PERSISTENT volume claim name (database) 1141 | name: PERSISTENT_VOLUME_CLAIM_DB 1142 | value: air-db-pvc 1143 | - description: Size of the metadata volume storage 1144 | displayName: Metadata volume storage size 1145 | name: PERSISTENT_VOLUME_CLAIM_DB_SIZE 1146 | value: "1Gi" 1147 | - description: Attached PERSISTENT volume claim name for storing temporary data across Celery workers 1148 | displayName: PERSISTENT volume claim name (temporary storage for workers) 1149 | name: PERSISTENT_VOLUME_CLAIM_TMP_WORKER 1150 | value: air-tmp-worker-pvc 1151 | - description: Size of the temporary data storage in Celery workers 1152 | displayName: Temporary data volume storage size 1153 | name: PERSISTENT_VOLUME_CLAIM_TMP_WORKER_SIZE 1154 | value: "2Gi" --------------------------------------------------------------------------------