├── .gitignore ├── scripts ├── kube │ ├── templates │ │ ├── affinity.yml │ │ ├── secrets.template.yaml │ │ ├── volumes.template.yaml │ │ ├── airflow.template.yaml │ │ └── configmaps.template.yaml │ ├── namespace.yaml │ ├── secrets.yaml │ ├── volumes.yaml │ ├── build │ │ ├── airflow.yaml │ │ └── configmaps.yaml │ └── deploy.sh ├── cleanup_infra.sh ├── docker │ ├── bootstrap.sh │ ├── airflow-test-env-init.sh │ └── Dockerfile ├── reset_env_vars.sh └── setup_infra.sh └── Readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store -------------------------------------------------------------------------------- /scripts/kube/templates/affinity.yml: -------------------------------------------------------------------------------- 1 | 2 | affinity = { 3 | 'nodeAffinity': { 4 | 'requiredDuringSchedulingIgnoredDuringExecution': [ 5 | { 6 | 'weight': 1, 7 | 'preference': { 8 | 'matchExpressions': [ 9 | { 10 | 'key': 'lifecycle', 11 | 'operator': 'In', 12 | 'values': ['Ec2Spot'] 13 | } 14 | ] 15 | } 16 | } 17 | ] 18 | } 19 | } 20 | 21 | tolerations = [{ 22 | 'key': 'spotInstance', 23 | 'operator': 'Equal', 24 | 'value': 'true', 25 | 'effect': 'PreferNoSchedule' 26 | }] -------------------------------------------------------------------------------- /scripts/kube/namespace.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | --- 18 | apiVersion: v1 19 | kind: Namespace 20 | metadata: 21 | name: airflow 22 | 23 | -------------------------------------------------------------------------------- /scripts/cleanup_infra.sh: -------------------------------------------------------------------------------- 1 | kubectl delete ns airflow 2 | helm delete cluster-autoscaler --namespace kube-system 3 | helm delete aws-efs-csi-driver --namespace kube-system 4 | 5 | aws efs delete-access-point --access-point-id $(aws efs describe-access-points --file-system-id $AOK_EFS_FS_ID --region $AOK_AWS_REGION --query 'AccessPoints[0].AccessPointId' --output text) --region $AOK_AWS_REGION 6 | for mount_target in $(aws efs describe-mount-targets --file-system-id $AOK_EFS_FS_ID --region $AOK_AWS_REGION --query 'MountTargets[].MountTargetId' --output text); do aws efs delete-mount-target --mount-target-id $mount_target --region $AOK_AWS_REGION; done 7 | sleep 15 8 | aws efs delete-file-system --file-system-id $AOK_EFS_FS_ID --region $AOK_AWS_REGION 9 | aws ec2 delete-security-group --group-id $AOK_EFS_SG_ID --region $AOK_AWS_REGION 10 | aws rds delete-db-instance --db-instance-identifier airflow-postgres --delete-automated-backups --skip-final-snapshot --region $AOK_AWS_REGION 11 | sleep 180 12 | aws rds delete-db-subnet-group --db-subnet-group-name airflow-postgres-subnet --region $AOK_AWS_REGION 13 | aws ecr delete-repository --repository-name airflow-eks-demo --force --region $AOK_AWS_REGION 14 | eksctl delete cluster --name=$AOK_EKS_CLUSTER_NAME 15 | -------------------------------------------------------------------------------- /scripts/docker/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one * 4 | # or more contributor license agreements. See the NOTICE file * 5 | # distributed with this work for additional information * 6 | # regarding copyright ownership. The ASF licenses this file * 7 | # to you under the Apache License, Version 2.0 (the * 8 | # "License"); you may not use this file except in compliance * 9 | # with the License. You may obtain a copy of the License at * 10 | # * 11 | # http://www.apache.org/licenses/LICENSE-2.0 * 12 | # * 13 | # Unless required by applicable law or agreed to in writing, * 14 | # software distributed under the License is distributed on an * 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 16 | # KIND, either express or implied. See the License for the * 17 | # specific language governing permissions and limitations * 18 | # under the License. * 19 | 20 | # launch the appropriate process 21 | 22 | if [ "$1" = "webserver" ] 23 | then 24 | exec airflow webserver 25 | fi 26 | 27 | if [ "$1" = "scheduler" ] 28 | then 29 | exec airflow scheduler 30 | fi 31 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Airflow Kubernetes Setup 2 | 3 | The setup files are copied directly from airflow's repo and modified to fit the requirements. 4 | 5 | 6 | # Pre Req 7 | 8 | 1. An EKS Cluster. 9 | 2. Spot Managed Nodes on EKS Cluster with following setup: \ 10 | a. Label ```lifecycle: Ec2Spot```. \ 11 | b. Taints ```spotInstance: true:PreferNoSchedule```. \ 12 | c. InstancesDistribution as ```spotAllocationStrategy: capacity-optimized```. \ 13 | d. Note: Without Spot Nodes, Jobs will run in OnDemand Nodes. 14 | 3. An ECR Repo to Push Airflow Docker Images. 15 | # Steps 16 | 17 | 1. Navigate to ```scripts\docker``` directory and build the Docker Image using ```docker build -t .``` 18 | 2. Push the image the ECR Repo using ```docker push ``` 19 | 3. set the following environment variables on your terminal : \ 20 | a. ```export AOK_AIRFLOW_REPOSITORY=```. \ 21 | 4. Navigate to ```scrips\kube``` directory and run the ```./deploy.sh``` to deploy the kubernetes infrastructure for airflow. 22 | 5. Obtain the airflow URL by running ```kubectl get svc -n airflow``` 23 | 6. Log in the airflow using the above URL with ```eksuser``` as user and ```ekspassword``` as password. 24 | 7. On your terminal, run ```kubectl get nodes --label-columns=lifecycle --selector=lifecycle=Ec2Spot``` to get a list of EC2. 25 | 8. On your terminal, run ```kubectl get pods -n airflow -w -o wide```. 26 | 9. Trigger one of the DAGs in Airflow console to see the pods triggered for the job in airflow console. 27 | 10. On your terminal, verify the pods are getting triggered in the same spot nodes with label ```lifecycle: Ec2Spot``` as in step 7. -------------------------------------------------------------------------------- /scripts/docker/airflow-test-env-init.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one * 4 | # or more contributor license agreements. See the NOTICE file * 5 | # distributed with this work for additional information * 6 | # regarding copyright ownership. The ASF licenses this file * 7 | # to you under the Apache License, Version 2.0 (the * 8 | # "License"); you may not use this file except in compliance * 9 | # with the License. You may obtain a copy of the License at * 10 | # * 11 | # http://www.apache.org/licenses/LICENSE-2.0 * 12 | # * 13 | # Unless required by applicable law or agreed to in writing, * 14 | # software distributed under the License is distributed on an * 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 16 | # KIND, either express or implied. See the License for the * 17 | # specific language governing permissions and limitations * 18 | # under the License. 19 | 20 | set -e 21 | 22 | cd /usr/local/lib/python3.6/site-packages/airflow && \ 23 | cp example_dags/example_bash_operator.py /root/airflow/dags/ && \ 24 | # cp -R example_dags/* /root/airflow/dags/ && \ 25 | # cp -R contrib/example_dags/example_kubernetes_*.py /root/airflow/dags/ && \ 26 | cp -R contrib/example_dags/libs /root/airflow/dags/ && \ 27 | airflow initdb && \ 28 | alembic upgrade heads && \ 29 | (airflow create_user --username eksuser --lastname eks --firstname user --email eksuser@amazon.com --role Admin --password ekspassword || true) 30 | -------------------------------------------------------------------------------- /scripts/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | 18 | FROM python:3.6-slim 19 | 20 | # install deps 21 | RUN apt-get update -y && apt-get install -y \ 22 | libczmq-dev \ 23 | libssl-dev \ 24 | inetutils-telnet \ 25 | bind9utils \ 26 | gcc \ 27 | && apt-get clean 28 | 29 | RUN pip install --upgrade pip 30 | 31 | RUN pip install apache-airflow==1.10.10 32 | RUN pip install SQLAlchemy==1.3.15 33 | RUN pip install 'apache-airflow[kubernetes]' 34 | RUN pip install 'apache-airflow[postgres]' 35 | 36 | 37 | COPY airflow-test-env-init.sh /tmp/airflow-test-env-init.sh 38 | 39 | COPY bootstrap.sh /bootstrap.sh 40 | RUN chmod +x /bootstrap.sh 41 | ENTRYPOINT ["/bootstrap.sh"] 42 | -------------------------------------------------------------------------------- /scripts/kube/templates/secrets.template.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | --- 18 | apiVersion: v1 19 | kind: Secret 20 | metadata: 21 | name: airflow-secrets 22 | namespace: airflow 23 | type: Opaque 24 | data: 25 | # The sql_alchemy_conn value is a base64 encoded representation of this connection string: 26 | # New postgresql://eksworkshop:${RDS_PASSWORD}@${RDS_ENDPOINT}:5432/eksworkshop 27 | # sql_alchemy_conn: cG9zdGdyZXNxbCtwc3ljb3BnMjovL3Jvb3Q6cm9vdEBwb3N0Z3Jlcy1haXJmbG93OjU0MzIvYWlyZmxvdwo= 28 | # old postgresql+psycopg2://root:root@postgres-airflow:5432/airflow 29 | sql_alchemy_conn: {{AOK_SQL_ALCHEMY_CONN}} 30 | # sql_alchemy_conn: cG9zdGdyZXNxbCtwc3ljb3BnMjovL3Jvb3Q6cm9vdEBwb3N0Z3Jlcy1haXJmbG93OjU0MzIvYWlyZmxvdwo= 31 | -------------------------------------------------------------------------------- /scripts/kube/secrets.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | --- 18 | apiVersion: v1 19 | kind: Secret 20 | metadata: 21 | name: airflow-secrets 22 | namespace: airflow 23 | type: Opaque 24 | data: 25 | # The sql_alchemy_conn value is a base64 encoded representation of this connection string: 26 | # New postgresql://eksworkshop:${RDS_PASSWORD}@${RDS_ENDPOINT}:5432/eksworkshop 27 | # sql_alchemy_conn: cG9zdGdyZXNxbCtwc3ljb3BnMjovL3Jvb3Q6cm9vdEBwb3N0Z3Jlcy1haXJmbG93OjU0MzIvYWlyZmxvdwo= 28 | # old postgresql+psycopg2://root:root@postgres-airflow:5432/airflow 29 | sql_alchemy_conn: cG9zdGdyZXNxbDovL2FpcmZsb3dhZG1pbjpzdXBlcnNlY3JldHBhc3N3b3JkQGFpcmZsb3ctcG9zdGdyZXMuY3dnc2c1c3dva3oxLnVzLXdlc3QtMi5yZHMuYW1hem9uYXdzLmNvbTo1NDMyL2FpcmZsb3c= 30 | # sql_alchemy_conn: cG9zdGdyZXNxbCtwc3ljb3BnMjovL3Jvb3Q6cm9vdEBwb3N0Z3Jlcy1haXJmbG93OjU0MzIvYWlyZmxvdwo= 31 | -------------------------------------------------------------------------------- /scripts/kube/volumes.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | 18 | # The backing volume can be anything you want, it just needs to be `ReadWriteOnce` 19 | # I'm using hostPath since minikube is nice for testing, but any (non-local) volume will work on a real cluster 20 | # apiVersion: storage.k8s.io/v1beta1 21 | apiVersion: storage.k8s.io/v1beta1 22 | kind: CSIDriver 23 | metadata: 24 | name: efs.csi.aws.com 25 | spec: 26 | attachRequired: false 27 | --- 28 | kind: StorageClass 29 | apiVersion: storage.k8s.io/v1 30 | metadata: 31 | name: efs-sc 32 | provisioner: efs.csi.aws.com 33 | --- 34 | apiVersion: v1 35 | kind: PersistentVolume 36 | metadata: 37 | name: airflow-efs-pv 38 | spec: 39 | capacity: 40 | storage: 100Gi 41 | volumeMode: Filesystem 42 | accessModes: 43 | - ReadWriteMany 44 | persistentVolumeReclaimPolicy: Retain 45 | storageClassName: efs-sc 46 | csi: 47 | driver: efs.csi.aws.com 48 | volumeHandle: fs-c77450c3::fsap-066dac7faa8e111f0 49 | --- 50 | apiVersion: v1 51 | kind: PersistentVolumeClaim 52 | metadata: 53 | name: airflow-efs-pvc 54 | namespace: airflow 55 | spec: 56 | accessModes: 57 | - ReadWriteMany 58 | storageClassName: efs-sc 59 | resources: 60 | requests: 61 | storage: 100Gi 62 | -------------------------------------------------------------------------------- /scripts/kube/templates/volumes.template.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | 18 | # The backing volume can be anything you want, it just needs to be `ReadWriteOnce` 19 | # I'm using hostPath since minikube is nice for testing, but any (non-local) volume will work on a real cluster 20 | # apiVersion: storage.k8s.io/v1beta1 21 | apiVersion: storage.k8s.io/v1beta1 22 | kind: CSIDriver 23 | metadata: 24 | name: efs.csi.aws.com 25 | spec: 26 | attachRequired: false 27 | --- 28 | kind: StorageClass 29 | apiVersion: storage.k8s.io/v1 30 | metadata: 31 | name: efs-sc 32 | provisioner: efs.csi.aws.com 33 | --- 34 | apiVersion: v1 35 | kind: PersistentVolume 36 | metadata: 37 | name: airflow-efs-pv 38 | spec: 39 | capacity: 40 | storage: 100Gi 41 | volumeMode: Filesystem 42 | accessModes: 43 | - ReadWriteMany 44 | persistentVolumeReclaimPolicy: Retain 45 | storageClassName: efs-sc 46 | csi: 47 | driver: efs.csi.aws.com 48 | volumeHandle: {{AOK_EFS_FS_ID}}::{{AOK_EFS_AP}} 49 | --- 50 | apiVersion: v1 51 | kind: PersistentVolumeClaim 52 | metadata: 53 | name: airflow-efs-pvc 54 | namespace: airflow 55 | spec: 56 | accessModes: 57 | - ReadWriteMany 58 | storageClassName: efs-sc 59 | resources: 60 | requests: 61 | storage: 100Gi 62 | -------------------------------------------------------------------------------- /scripts/reset_env_vars.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script will perform the following: 3 | # 1. Resets all the environment variables for deploying airflow on an existing EKS custer 4 | # This script should be run using the command ". ./reset_env_vars.sh" to preserve the environment variables. 5 | 6 | export AOK_AWS_REGION=us-west-2 #<-- Change this to match your region 7 | export AOK_ACCOUNT_ID=$(aws sts get-caller-identity --query 'Account' --output text) 8 | export AOK_EKS_CLUSTER_NAME=Airflow-on-Kubernetes 9 | 10 | printf "Setting EFS file system...\n" 11 | export AOK_EFS_FS_ID=$(aws efs describe-file-systems \ 12 | --creation-token Airflow-on-EKS \ 13 | --region $AOK_AWS_REGION \ 14 | --output text \ 15 | --query "FileSystems[0].FileSystemId") 16 | 17 | printf "Setting EFS access point...\n" 18 | export AOK_EFS_AP=$(aws efs describe-access-points \ 19 | --file-system-id $AOK_EFS_FS_ID \ 20 | --region $AOK_AWS_REGION \ 21 | --query 'AccessPoints[0].AccessPointId' \ 22 | --output text) 23 | 24 | printf "Setting RDS security group...\n" 25 | export AOK_RDS_SG=$(aws rds describe-db-instances \ 26 | --db-instance-identifier airflow-postgres \ 27 | --region $AOK_AWS_REGION \ 28 | --query "DBInstances[].VpcSecurityGroups[].VpcSecurityGroupId" \ 29 | --output text) 30 | 31 | printf "Setting RDS endpoint....\n" 32 | export AOK_RDS_ENDPOINT=$(aws rds describe-db-instances \ 33 | --db-instance-identifier airflow-postgres \ 34 | --query 'DBInstances[0].Endpoint.Address' \ 35 | --region $AOK_AWS_REGION \ 36 | --output text) 37 | 38 | printf "Setting an SQL connection string....\n" 39 | 40 | _UNAME_OUT=$(uname -s) 41 | case "${_UNAME_OUT}" in 42 | Linux*) _MY_OS=linux;; 43 | Darwin*) _MY_OS=darwin;; 44 | *) echo "${_UNAME_OUT} is unsupported." 45 | exit 1;;s 46 | esac 47 | echo "Local OS is ${_MY_OS}" 48 | 49 | case $_MY_OS in 50 | linux) 51 | export AOK_SQL_ALCHEMY_CONN=$(echo -n postgresql://airflowadmin:supersecretpassword@${AOK_RDS_ENDPOINT}:5432/airflow | base64 -w 0) 52 | ;; 53 | darwin) 54 | export AOK_SQL_ALCHEMY_CONN=$(echo -n postgresql://airflowadmin:supersecretpassword@${AOK_RDS_ENDPOINT}:5432/airflow | base64) 55 | ;; 56 | *) 57 | echo "${_UNAME_OUT} is unsupported." 58 | exit 1 59 | ;; 60 | esac 61 | 62 | export AOK_AIRFLOW_REPOSITORY=$(aws ecr describe-repositories \ 63 | --repository-name airflow-eks-demo \ 64 | --region $AOK_AWS_REGION \ 65 | --query 'repositories[0].repositoryUri' \ 66 | --output text) -------------------------------------------------------------------------------- /scripts/kube/templates/airflow.template.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | 18 | # Note: The airflow image used in this example is obtained by * 19 | # building the image from the local docker subdirectory. * 20 | --- 21 | apiVersion: v1 22 | kind: ServiceAccount 23 | metadata: 24 | name: airflow 25 | namespace: airflow 26 | --- 27 | apiVersion: rbac.authorization.k8s.io/v1 28 | kind: Role 29 | metadata: 30 | namespace: airflow 31 | name: airflow 32 | rules: 33 | - apiGroups: [""] # "" indicates the core API group 34 | resources: ["pods"] 35 | verbs: ["get", "list", "watch", "create", "update", "delete"] 36 | - apiGroups: ["batch", "extensions"] 37 | resources: ["jobs"] 38 | verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] 39 | --- 40 | apiVersion: rbac.authorization.k8s.io/v1 41 | kind: RoleBinding 42 | metadata: 43 | name: airflow 44 | namespace: airflow 45 | subjects: 46 | - kind: ServiceAccount 47 | name: airflow # Name of the ServiceAccount 48 | namespace: airflow 49 | roleRef: 50 | kind: Role # This must be Role or ClusterRole 51 | name: airflow # This must match the name of the Role 52 | # or ClusterRole you wish to bind to 53 | apiGroup: rbac.authorization.k8s.io 54 | --- 55 | apiVersion: apps/v1 56 | kind: Deployment 57 | metadata: 58 | name: airflow 59 | namespace: airflow 60 | spec: 61 | replicas: 1 62 | selector: 63 | matchLabels: 64 | name: airflow 65 | template: 66 | metadata: 67 | labels: 68 | name: airflow 69 | spec: 70 | serviceAccountName: airflow 71 | affinity: 72 | nodeAffinity: 73 | requiredDuringSchedulingIgnoredDuringExecution: 74 | nodeSelectorTerms: 75 | - matchExpressions: 76 | - key: lifecycle 77 | operator: NotIn 78 | values: 79 | - Ec2Spot 80 | initContainers: 81 | - name: "init" 82 | image: {{AIRFLOW_IMAGE}}:{{AIRFLOW_TAG}} 83 | imagePullPolicy: Always 84 | volumeMounts: 85 | - name: airflow-configmap 86 | mountPath: /root/airflow/airflow.cfg 87 | subPath: airflow.cfg 88 | - name: {{INIT_DAGS_VOLUME_NAME}} 89 | mountPath: /root/airflow/dags 90 | env: 91 | - name: SQL_ALCHEMY_CONN 92 | valueFrom: 93 | secretKeyRef: 94 | name: airflow-secrets 95 | key: sql_alchemy_conn 96 | command: 97 | - "bash" 98 | args: 99 | - "-cx" 100 | - "./tmp/airflow-test-env-init.sh {{INIT_GIT_SYNC}}" 101 | containers: 102 | - name: webserver 103 | image: {{AIRFLOW_IMAGE}}:{{AIRFLOW_TAG}} 104 | imagePullPolicy: Always 105 | ports: 106 | - name: webserver 107 | containerPort: 8080 108 | args: ["webserver"] 109 | env: 110 | - name: AIRFLOW_KUBE_NAMESPACE 111 | valueFrom: 112 | fieldRef: 113 | fieldPath: metadata.namespace 114 | - name: SQL_ALCHEMY_CONN 115 | valueFrom: 116 | secretKeyRef: 117 | name: airflow-secrets 118 | key: sql_alchemy_conn 119 | volumeMounts: 120 | - name: airflow-configmap 121 | mountPath: /root/airflow/airflow.cfg 122 | subPath: airflow.cfg 123 | - name: {{POD_AIRFLOW_VOLUME_NAME}} 124 | mountPath: /root/airflow/dags 125 | - name: {{POD_AIRFLOW_VOLUME_NAME}} 126 | mountPath: /root/airflow/logs 127 | - name: scheduler 128 | image: {{AIRFLOW_IMAGE}}:{{AIRFLOW_TAG}} 129 | imagePullPolicy: Always 130 | args: ["scheduler"] 131 | env: 132 | - name: AIRFLOW_KUBE_NAMESPACE 133 | valueFrom: 134 | fieldRef: 135 | fieldPath: metadata.namespace 136 | - name: SQL_ALCHEMY_CONN 137 | valueFrom: 138 | secretKeyRef: 139 | name: airflow-secrets 140 | key: sql_alchemy_conn 141 | volumeMounts: 142 | - name: airflow-configmap 143 | mountPath: /root/airflow/airflow.cfg 144 | subPath: airflow.cfg 145 | - name: {{POD_AIRFLOW_VOLUME_NAME}} 146 | mountPath: /root/airflow/dags 147 | - name: {{POD_AIRFLOW_VOLUME_NAME}} 148 | mountPath: /root/airflow/logs 149 | volumes: 150 | - name: airflow-dags 151 | persistentVolumeClaim: 152 | claimName: airflow-efs-pvc 153 | - name: airflow-dags-fake 154 | emptyDir: {} 155 | - name: airflow-dags-git 156 | emptyDir: {} 157 | - name: airflow-configmap 158 | configMap: 159 | name: airflow-configmap 160 | --- 161 | apiVersion: v1 162 | kind: Service 163 | metadata: 164 | name: airflow 165 | namespace: airflow 166 | spec: 167 | type: LoadBalancer 168 | ports: 169 | - port: 8080 170 | selector: 171 | name: airflow -------------------------------------------------------------------------------- /scripts/kube/build/airflow.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | 18 | # Note: The airflow image used in this example is obtained by * 19 | # building the image from the local docker subdirectory. * 20 | --- 21 | apiVersion: v1 22 | kind: ServiceAccount 23 | metadata: 24 | name: airflow 25 | namespace: airflow 26 | --- 27 | apiVersion: rbac.authorization.k8s.io/v1 28 | kind: Role 29 | metadata: 30 | namespace: airflow 31 | name: airflow 32 | rules: 33 | - apiGroups: [""] # "" indicates the core API group 34 | resources: ["pods"] 35 | verbs: ["get", "list", "watch", "create", "update", "delete"] 36 | - apiGroups: ["batch", "extensions"] 37 | resources: ["jobs"] 38 | verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] 39 | --- 40 | apiVersion: rbac.authorization.k8s.io/v1 41 | kind: RoleBinding 42 | metadata: 43 | name: airflow 44 | namespace: airflow 45 | subjects: 46 | - kind: ServiceAccount 47 | name: airflow # Name of the ServiceAccount 48 | namespace: airflow 49 | roleRef: 50 | kind: Role # This must be Role or ClusterRole 51 | name: airflow # This must match the name of the Role 52 | # or ClusterRole you wish to bind to 53 | apiGroup: rbac.authorization.k8s.io 54 | --- 55 | apiVersion: apps/v1 56 | kind: Deployment 57 | metadata: 58 | name: airflow 59 | namespace: airflow 60 | spec: 61 | replicas: 1 62 | selector: 63 | matchLabels: 64 | name: airflow 65 | template: 66 | metadata: 67 | labels: 68 | name: airflow 69 | spec: 70 | serviceAccountName: airflow 71 | affinity: 72 | nodeAffinity: 73 | requiredDuringSchedulingIgnoredDuringExecution: 74 | nodeSelectorTerms: 75 | - matchExpressions: 76 | - key: lifecycle 77 | operator: NotIn 78 | values: 79 | - Ec2Spot 80 | initContainers: 81 | - name: "init" 82 | image: 542082525742.dkr.ecr.us-west-2.amazonaws.com/airflow-eks-demo:latest 83 | imagePullPolicy: Always 84 | volumeMounts: 85 | - name: airflow-configmap 86 | mountPath: /root/airflow/airflow.cfg 87 | subPath: airflow.cfg 88 | - name: airflow-dags 89 | mountPath: /root/airflow/dags 90 | env: 91 | - name: SQL_ALCHEMY_CONN 92 | valueFrom: 93 | secretKeyRef: 94 | name: airflow-secrets 95 | key: sql_alchemy_conn 96 | command: 97 | - "bash" 98 | args: 99 | - "-cx" 100 | - "./tmp/airflow-test-env-init.sh " 101 | containers: 102 | - name: webserver 103 | image: 542082525742.dkr.ecr.us-west-2.amazonaws.com/airflow-eks-demo:latest 104 | imagePullPolicy: Always 105 | ports: 106 | - name: webserver 107 | containerPort: 8080 108 | args: ["webserver"] 109 | env: 110 | - name: AIRFLOW_KUBE_NAMESPACE 111 | valueFrom: 112 | fieldRef: 113 | fieldPath: metadata.namespace 114 | - name: SQL_ALCHEMY_CONN 115 | valueFrom: 116 | secretKeyRef: 117 | name: airflow-secrets 118 | key: sql_alchemy_conn 119 | volumeMounts: 120 | - name: airflow-configmap 121 | mountPath: /root/airflow/airflow.cfg 122 | subPath: airflow.cfg 123 | - name: airflow-dags 124 | mountPath: /root/airflow/dags 125 | - name: airflow-dags 126 | mountPath: /root/airflow/logs 127 | - name: scheduler 128 | image: 542082525742.dkr.ecr.us-west-2.amazonaws.com/airflow-eks-demo:latest 129 | imagePullPolicy: Always 130 | args: ["scheduler"] 131 | env: 132 | - name: AIRFLOW_KUBE_NAMESPACE 133 | valueFrom: 134 | fieldRef: 135 | fieldPath: metadata.namespace 136 | - name: SQL_ALCHEMY_CONN 137 | valueFrom: 138 | secretKeyRef: 139 | name: airflow-secrets 140 | key: sql_alchemy_conn 141 | volumeMounts: 142 | - name: airflow-configmap 143 | mountPath: /root/airflow/airflow.cfg 144 | subPath: airflow.cfg 145 | - name: airflow-dags 146 | mountPath: /root/airflow/dags 147 | - name: airflow-dags 148 | mountPath: /root/airflow/logs 149 | volumes: 150 | - name: airflow-dags 151 | persistentVolumeClaim: 152 | claimName: airflow-efs-pvc 153 | - name: airflow-dags-fake 154 | emptyDir: {} 155 | - name: airflow-dags-git 156 | emptyDir: {} 157 | - name: airflow-configmap 158 | configMap: 159 | name: airflow-configmap 160 | --- 161 | apiVersion: v1 162 | kind: Service 163 | metadata: 164 | name: airflow 165 | namespace: airflow 166 | spec: 167 | type: LoadBalancer 168 | ports: 169 | - port: 8080 170 | selector: 171 | name: airflow -------------------------------------------------------------------------------- /scripts/kube/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one * 4 | # or more contributor license agreements. See the NOTICE file * 5 | # distributed with this work for additional information * 6 | # regarding copyright ownership. The ASF licenses this file * 7 | # to you under the Apache License, Version 2.0 (the * 8 | # "License"); you may not use this file except in compliance * 9 | # with the License. You may obtain a copy of the License at * 10 | # * 11 | # http://www.apache.org/licenses/LICENSE-2.0 * 12 | # * 13 | # Unless required by applicable law or agreed to in writing, * 14 | # software distributed under the License is distributed on an * 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 16 | # KIND, either express or implied. See the License for the * 17 | # specific language governing permissions and limitations * 18 | # under the License. * 19 | 20 | set -x 21 | 22 | echo "Airflow Image Repo" $AOK_AIRFLOW_REPOSITORY 23 | echo "EFS File System ID" $AOK_EFS_FS_ID 24 | echo "EFS Access Point" $AOK_EFS_AP 25 | echo "RDS SQL Connection String" $AOK_SQL_ALCHEMY_CONN 26 | 27 | if [ -z "$AOK_AIRFLOW_REPOSITORY" ]; then 28 | echo "\AOK_AIRFLOW_REPOSITORY environement variable is empty." 29 | exit 1 30 | fi 31 | if [ -z "$AOK_EFS_FS_ID" ]; then 32 | echo "\AOK_EFS_FS_ID environement variable is empty." 33 | exit 1 34 | fi 35 | if [ -z "$AOK_EFS_AP" ]; then 36 | echo "\AOK_EFS_AP environement variable is empty." 37 | exit 1 38 | fi 39 | if [ -z "$AOK_SQL_ALCHEMY_CONN" ]; then 40 | echo "\AOK_SQL_ALCHEMY_CONN environement variable is empty." 41 | exit 1 42 | fi 43 | 44 | AIRFLOW_IMAGE=$AOK_AIRFLOW_REPOSITORY 45 | AIRFLOW_TAG=latest 46 | DIRNAME=$(cd "$(dirname "$0")"; pwd) 47 | TEMPLATE_DIRNAME=${DIRNAME}/templates 48 | BUILD_DIRNAME=${DIRNAME}/build 49 | 50 | if [ ! -d "$BUILD_DIRNAME" ]; then 51 | mkdir -p ${BUILD_DIRNAME} 52 | fi 53 | 54 | rm -f ${BUILD_DIRNAME}/* 55 | 56 | 57 | INIT_DAGS_VOLUME_NAME=airflow-dags 58 | POD_AIRFLOW_VOLUME_NAME=airflow-dags 59 | CONFIGMAP_DAGS_FOLDER=/root/airflow/dags 60 | CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT= 61 | CONFIGMAP_DAGS_VOLUME_CLAIM=airflow-efs-pvc 62 | 63 | CONFIGMAP_GIT_REPO=${TRAVIS_REPO_SLUG:-apache/airflow} 64 | CONFIGMAP_BRANCH=${TRAVIS_BRANCH:-master} 65 | 66 | _UNAME_OUT=$(uname -s) 67 | case "${_UNAME_OUT}" in 68 | Linux*) _MY_OS=linux;; 69 | Darwin*) _MY_OS=darwin;; 70 | *) echo "${_UNAME_OUT} is unsupported." 71 | exit 1;; 72 | esac 73 | echo "Local OS is ${_MY_OS}" 74 | 75 | case $_MY_OS in 76 | linux) 77 | SED_COMMAND=sed 78 | ;; 79 | darwin) 80 | SED_COMMAND=gsed 81 | if ! $(type "$SED_COMMAND" &> /dev/null) ; then 82 | echo "Could not find \"$SED_COMMAND\" binary, please install it. On OSX brew install gnu-sed" >&2 83 | exit 1 84 | fi 85 | ;; 86 | *) 87 | echo "${_UNAME_OUT} is unsupported." 88 | exit 1 89 | ;; 90 | esac 91 | 92 | ${SED_COMMAND} -e "s/{{INIT_GIT_SYNC}}//g" \ 93 | ${TEMPLATE_DIRNAME}/airflow.template.yaml > ${BUILD_DIRNAME}/airflow.yaml 94 | ${SED_COMMAND} -i "s|{{AIRFLOW_IMAGE}}|$AIRFLOW_IMAGE|g" ${BUILD_DIRNAME}/airflow.yaml 95 | ${SED_COMMAND} -i "s|{{AIRFLOW_TAG}}|$AIRFLOW_TAG|g" ${BUILD_DIRNAME}/airflow.yaml 96 | 97 | ${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_REPO}}|$CONFIGMAP_GIT_REPO|g" ${BUILD_DIRNAME}/airflow.yaml 98 | ${SED_COMMAND} -i "s|{{CONFIGMAP_BRANCH}}|$CONFIGMAP_BRANCH|g" ${BUILD_DIRNAME}/airflow.yaml 99 | ${SED_COMMAND} -i "s|{{INIT_DAGS_VOLUME_NAME}}|$INIT_DAGS_VOLUME_NAME|g" ${BUILD_DIRNAME}/airflow.yaml 100 | ${SED_COMMAND} -i "s|{{POD_AIRFLOW_VOLUME_NAME}}|$POD_AIRFLOW_VOLUME_NAME|g" ${BUILD_DIRNAME}/airflow.yaml 101 | 102 | ${SED_COMMAND} "s|{{CONFIGMAP_DAGS_FOLDER}}|$CONFIGMAP_DAGS_FOLDER|g" \ 103 | ${TEMPLATE_DIRNAME}/configmaps.template.yaml > ${BUILD_DIRNAME}/configmaps.yaml 104 | ${SED_COMMAND} -i "s|{{AIRFLOW_IMAGE}}|$AIRFLOW_IMAGE|g" ${BUILD_DIRNAME}/configmaps.yaml 105 | ${SED_COMMAND} -i "s|{{AIRFLOW_TAG}}|$AIRFLOW_TAG|g" ${BUILD_DIRNAME}/configmaps.yaml 106 | ${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_REPO}}|$CONFIGMAP_GIT_REPO|g" ${BUILD_DIRNAME}/configmaps.yaml 107 | ${SED_COMMAND} -i "s|{{CONFIGMAP_BRANCH}}|$CONFIGMAP_BRANCH|g" ${BUILD_DIRNAME}/configmaps.yaml 108 | ${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}}|$CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT|g" ${BUILD_DIRNAME}/configmaps.yaml 109 | ${SED_COMMAND} -i "s|{{CONFIGMAP_DAGS_VOLUME_CLAIM}}|$CONFIGMAP_DAGS_VOLUME_CLAIM|g" ${BUILD_DIRNAME}/configmaps.yaml 110 | ${SED_COMMAND} "s|{{AOK_EFS_FS_ID}}|$AOK_EFS_FS_ID|g" \ 111 | ${TEMPLATE_DIRNAME}/volumes.template.yaml > ${DIRNAME}/volumes.yaml 112 | ${SED_COMMAND} -i "s|{{AOK_EFS_AP}}|$AOK_EFS_AP|g" ${DIRNAME}/volumes.yaml 113 | ${SED_COMMAND} "s|{{AOK_SQL_ALCHEMY_CONN}}|$AOK_SQL_ALCHEMY_CONN|g" \ 114 | ${TEMPLATE_DIRNAME}/secrets.template.yaml > ${DIRNAME}/secrets.yaml 115 | 116 | cat ${BUILD_DIRNAME}/airflow.yaml 117 | cat ${BUILD_DIRNAME}/configmaps.yaml 118 | cat ${DIRNAME}/volumes.yaml 119 | cat ${DIRNAME}/secrets.yaml 120 | 121 | # Fix file permissions 122 | if [[ "${TRAVIS}" == true ]]; then 123 | sudo chown -R travis.travis $HOME/.kube $HOME/.minikube 124 | fi 125 | 126 | NAMESPACE_AVAILABLE=$(kubectl get namespace airflow|wc -l | xargs) 127 | 128 | echo $NAMESPACE_AVAILABLE 129 | 130 | if [ "$NAMESPACE_AVAILABLE" -gt "0" ]; then 131 | kubectl delete -f $DIRNAME/namespace.yaml 132 | kubectl delete -f $DIRNAME/volumes.yaml 133 | fi 134 | 135 | 136 | case $_MY_OS in 137 | linux) 138 | sleep 1m 139 | ;; 140 | darwin) 141 | sleep 60 142 | ;; 143 | *) 144 | echo "${_UNAME_OUT} is unsupported." 145 | exit 1 146 | ;; 147 | esac 148 | set -e 149 | 150 | 151 | kubectl apply -f $DIRNAME/namespace.yaml 152 | kubectl apply -f $DIRNAME/secrets.yaml 153 | kubectl apply -f $BUILD_DIRNAME/configmaps.yaml 154 | kubectl apply -f $DIRNAME/volumes.yaml 155 | kubectl apply -f $BUILD_DIRNAME/airflow.yaml 156 | 157 | 158 | # wait for up to 10 minutes for everything to be deployed 159 | PODS_ARE_READY=0 160 | for i in {1..150} 161 | do 162 | echo "------- Running kubectl get pods -------" 163 | PODS=$(kubectl get pods -n airflow| awk 'NR>1 {print $0}') 164 | echo "$PODS" 165 | NUM_AIRFLOW_READY=$(echo $PODS | grep airflow | awk '{print $2}' | grep -E '([0-9])\/(\1)' | wc -l | xargs) 166 | # NUM_POSTGRES_READY=$(echo $PODS | grep postgres | awk '{print $2}' | grep -E '([0-9])\/(\1)' | wc -l | xargs) 167 | if [ "$NUM_AIRFLOW_READY" == "1" ]; then 168 | PODS_ARE_READY=1 169 | break 170 | fi 171 | sleep 4 172 | done 173 | 174 | if [ "$PODS_ARE_READY" == 1 ]; then 175 | echo "PODS are ready." 176 | else 177 | echo "PODS are not ready after waiting for a long time. Exiting..." 178 | exit 1 179 | fi 180 | -------------------------------------------------------------------------------- /scripts/setup_infra.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script will perform the following: 3 | # 1. Deploy Kubernetes Cluster Autoscaler. 4 | # 2. Deploy the EFS CSI driver and create EFS filesystem and Access Point. 5 | # 3. Deploy an Amazon RDS PostgreSQL database. 6 | # 4. Creates an ECR Repository for holding Airflow Docker Image. 7 | # This script should be run using the command ". ./setup_infra.sh" to preserve the environment variables. 8 | 9 | # Prerequisites: 10 | # - AWS Profile should be setup on the executing shell. 11 | # - Environment variables AOK_AWS_REGION, AOK_EKS_CLUSTER_NAME should be set. 12 | 13 | 14 | printf "Deploy Kubernetes Cluster Autoscaler.\n" 15 | 16 | printf "Associating OIDC provider with the EKS cluster...\n" 17 | 18 | eksctl utils associate-iam-oidc-provider \ 19 | --region $AOK_AWS_REGION \ 20 | --cluster $AOK_EKS_CLUSTER_NAME\ 21 | --approve 22 | 23 | printf "Creating an IAM policy document for cluster autoscaler...\n" 24 | cat << EOF > cluster-autoscaler-policy.json 25 | { 26 | "Version": "2012-10-17", 27 | "Statement": [ 28 | { 29 | "Action": [ 30 | "autoscaling:DescribeAutoScalingGroups", 31 | "autoscaling:DescribeAutoScalingInstances", 32 | "autoscaling:DescribeLaunchConfigurations", 33 | "autoscaling:DescribeTags", 34 | "autoscaling:SetDesiredCapacity", 35 | "autoscaling:TerminateInstanceInAutoScalingGroup", 36 | "ec2:DescribeLaunchTemplateVersions" 37 | ], 38 | "Resource": "*", 39 | "Effect": "Allow" 40 | } 41 | ] 42 | } 43 | EOF 44 | 45 | printf "Creating the IAM policy...\n" 46 | export AOK_AmazonEKSClusterAutoscalerPolicy=$(aws iam create-policy \ 47 | --policy-name AmazonEKSClusterAutoscalerPolicy \ 48 | --policy-document file://cluster-autoscaler-policy.json) 49 | 50 | printf "Creating service account for cluster autoscaler...\n" 51 | eksctl create iamserviceaccount \ 52 | --cluster=$AOK_EKS_CLUSTER_NAME \ 53 | --namespace=kube-system \ 54 | --name=cluster-autoscaler \ 55 | --attach-policy-arn=arn:aws:iam::$AOK_ACCOUNT_ID:policy/AmazonEKSClusterAutoscalerPolicy \ 56 | --override-existing-serviceaccounts \ 57 | --region $AOK_AWS_REGION \ 58 | --approve 59 | 60 | printf "Adding cluster autoscaler helm repo....\n" 61 | helm repo add autoscaler https://kubernetes.github.io/autoscaler 62 | helm repo update 63 | 64 | printf "Installing cluster autoscaler....\n" 65 | helm install cluster-autoscaler \ 66 | autoscaler/cluster-autoscaler \ 67 | --namespace kube-system \ 68 | --set 'autoDiscovery.clusterName'=$AOK_EKS_CLUSTER_NAME \ 69 | --set awsRegion=$AOK_AWS_REGION \ 70 | --set cloud-provider=aws \ 71 | --set extraArgs.balance-similar-node-groups=true \ 72 | --set extraArgs.skip-nodes-with-system-pods=true \ 73 | --set rbac.serviceAccount.create=false \ 74 | --set rbac.serviceAccount.name=cluster-autoscaler 75 | 76 | 77 | 78 | printf "Deploy the EFS CSI driver and create EFS filesystem and Access Point.\n" 79 | 80 | printf "Deploying EFS Driver...\n" 81 | helm repo add aws-efs-csi-driver https://kubernetes-sigs.github.io/aws-efs-csi-driver/ 82 | helm repo update 83 | helm upgrade --install aws-efs-csi-driver \ 84 | aws-efs-csi-driver/aws-efs-csi-driver \ 85 | --namespace kube-system 86 | 87 | printf "Getting the VPC of the EKS cluster and its CIDR block...\n" 88 | export AOK_VPC_ID=$(aws eks describe-cluster --name $AOK_EKS_CLUSTER_NAME \ 89 | --region $AOK_AWS_REGION \ 90 | --query "cluster.resourcesVpcConfig.vpcId" \ 91 | --output text) 92 | export AOK_CIDR_BLOCK=$(aws ec2 describe-vpcs --vpc-ids $AOK_VPC_ID \ 93 | --query "Vpcs[].CidrBlock" \ 94 | --region $AOK_AWS_REGION \ 95 | --output text) 96 | 97 | printf "Creating a security group for EFS, and allow inbound NFS traffic (port 2049):...\n" 98 | export AOK_EFS_SG_ID=$(aws ec2 create-security-group \ 99 | --region $AOK_AWS_REGION \ 100 | --description Airflow-on-EKS \ 101 | --group-name Airflow-on-EKS \ 102 | --vpc-id $AOK_VPC_ID \ 103 | --query 'GroupId' \ 104 | --output text) 105 | 106 | aws ec2 authorize-security-group-ingress \ 107 | --group-id $AOK_EFS_SG_ID \ 108 | --protocol tcp \ 109 | --port 2049 \ 110 | --cidr $AOK_CIDR_BLOCK \ 111 | --region $AOK_AWS_REGION 112 | 113 | printf "Creating an EFS file system...\n" 114 | export AOK_EFS_FS_ID=$(aws efs create-file-system \ 115 | --creation-token Airflow-on-EKS \ 116 | --performance-mode generalPurpose \ 117 | --throughput-mode bursting \ 118 | --region $AOK_AWS_REGION \ 119 | --tags Key=Name,Value=AirflowVolume \ 120 | --encrypted \ 121 | --output text \ 122 | --query "FileSystemId") 123 | 124 | printf "Waiting for 10 seconds...\n" 125 | sleep 10 126 | 127 | printf "Creating EFS mount targets in each subnet attached to on-demand nodes...\n" 128 | for subnet in $(aws eks describe-nodegroup \ 129 | --cluster-name $AOK_EKS_CLUSTER_NAME \ 130 | --nodegroup-name ng-on-demand \ 131 | --region $AOK_AWS_REGION \ 132 | --output text \ 133 | --query "nodegroup.subnets"); \ 134 | do (aws efs create-mount-target \ 135 | --file-system-id $AOK_EFS_FS_ID \ 136 | --subnet-id $subnet \ 137 | --security-group $AOK_EFS_SG_ID \ 138 | --region $AOK_AWS_REGION); \ 139 | done 140 | 141 | printf "Creating an EFS access point...\n" 142 | export AOK_EFS_AP=$(aws efs create-access-point \ 143 | --file-system-id $AOK_EFS_FS_ID \ 144 | --posix-user Uid=1000,Gid=1000 \ 145 | --root-directory "Path=/airflow,CreationInfo={OwnerUid=1000,OwnerGid=1000,Permissions=777}" \ 146 | --region $AOK_AWS_REGION \ 147 | --query 'AccessPointId' \ 148 | --output text) 149 | 150 | printf "Deploy an Amazon RDS PostgreSQL database.\n" 151 | 152 | printf "Obtaining the list of Private Subnets in Env variables...\n" 153 | export AOK_PRIVATE_SUBNETS=$(aws eks describe-nodegroup \ 154 | --cluster-name $AOK_EKS_CLUSTER_NAME \ 155 | --nodegroup-name ng-on-demand \ 156 | --region $AOK_AWS_REGION \ 157 | --output text \ 158 | --query "nodegroup.subnets" | awk -v OFS="," '{for(i=1;i<=NF;i++)if($i~/subnet/)$i="\"" $i "\"";$1=$1}1') 159 | 160 | printf "Creating a DB Subnet group...\n" 161 | aws rds create-db-subnet-group \ 162 | --db-subnet-group-name airflow-postgres-subnet \ 163 | --subnet-ids "[$AOK_PRIVATE_SUBNETS]" \ 164 | --db-subnet-group-description "Subnet group for Postgres RDS" \ 165 | --region $AOK_AWS_REGION 166 | 167 | printf "Creating the RDS Postgres Instance...\n" 168 | aws rds create-db-instance \ 169 | --db-instance-identifier airflow-postgres \ 170 | --db-instance-class db.m4.large \ 171 | --db-name airflow \ 172 | --db-subnet-group-name airflow-postgres-subnet \ 173 | --engine postgres \ 174 | --master-username airflowadmin \ 175 | --master-user-password supersecretpassword \ 176 | --allocated-storage 20 \ 177 | --no-publicly-accessible \ 178 | --region $AOK_AWS_REGION 179 | 180 | 181 | printf "Creating RDS security group...\n" 182 | export AOK_RDS_SG=$(aws rds describe-db-instances \ 183 | --db-instance-identifier airflow-postgres \ 184 | --region $AOK_AWS_REGION \ 185 | --query "DBInstances[].VpcSecurityGroups[].VpcSecurityGroupId" \ 186 | --output text) 187 | 188 | printf "Authorizing traffic...\n" 189 | aws ec2 authorize-security-group-ingress \ 190 | --group-id $AOK_RDS_SG \ 191 | --cidr $AOK_CIDR_BLOCK \ 192 | --port 5432 \ 193 | --protocol tcp \ 194 | --region $AOK_AWS_REGION 195 | 196 | printf "Waiting for 5 minutes...\n" 197 | sleep 300 198 | 199 | printf "Checking if the RDS Instance is up ....\n" 200 | aws rds describe-db-instances \ 201 | --db-instance-identifier airflow-postgres \ 202 | --region $AOK_AWS_REGION \ 203 | --query "DBInstances[].DBInstanceStatus" 204 | 205 | printf "Creating an RDS endpoint....\n" 206 | export AOK_RDS_ENDPOINT=$(aws rds describe-db-instances \ 207 | --db-instance-identifier airflow-postgres \ 208 | --query 'DBInstances[0].Endpoint.Address' \ 209 | --region $AOK_AWS_REGION \ 210 | --output text) 211 | 212 | printf "Creating an SQL connection string....\n" 213 | 214 | _UNAME_OUT=$(uname -s) 215 | case "${_UNAME_OUT}" in 216 | Linux*) _MY_OS=linux;; 217 | Darwin*) _MY_OS=darwin;; 218 | *) echo "${_UNAME_OUT} is unsupported." 219 | exit 1;; 220 | esac 221 | echo "Local OS is ${_MY_OS}" 222 | 223 | case $_MY_OS in 224 | linux) 225 | export AOK_SQL_ALCHEMY_CONN=$(echo -n postgresql://airflowadmin:supersecretpassword@${AOK_RDS_ENDPOINT}:5432/airflow | base64 -w 0) 226 | ;; 227 | darwin) 228 | export AOK_SQL_ALCHEMY_CONN=$(echo -n postgresql://airflowadmin:supersecretpassword@${AOK_RDS_ENDPOINT}:5432/airflow | base64) 229 | ;; 230 | *) 231 | echo "${_UNAME_OUT} is unsupported." 232 | exit 1 233 | ;; 234 | esac 235 | 236 | export AOK_AIRFLOW_REPOSITORY=$(aws ecr create-repository \ 237 | --repository-name airflow-eks-demo \ 238 | --region $AOK_AWS_REGION \ 239 | --query 'repository.repositoryUri' \ 240 | --output text) -------------------------------------------------------------------------------- /scripts/kube/build/configmaps.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | apiVersion: v1 18 | kind: ConfigMap 19 | metadata: 20 | name: airflow-configmap 21 | namespace: airflow 22 | data: 23 | airflow.cfg: | 24 | [core] 25 | dags_folder = /root/airflow/dags 26 | base_log_folder = /root/airflow/logs 27 | logging_level = INFO 28 | executor = KubernetesExecutor 29 | parallelism = 32 30 | load_examples = False 31 | plugins_folder = /root/airflow/plugins 32 | sql_alchemy_conn = $SQL_ALCHEMY_CONN 33 | 34 | [scheduler] 35 | dag_dir_list_interval = 300 36 | child_process_log_directory = /root/airflow/logs/scheduler 37 | # Task instances listen for external kill signal (when you clear tasks 38 | # from the CLI or the UI), this defines the frequency at which they should 39 | # listen (in seconds). 40 | job_heartbeat_sec = 5 41 | max_threads = 2 42 | 43 | # The scheduler constantly tries to trigger new tasks (look at the 44 | # scheduler section in the docs for more information). This defines 45 | # how often the scheduler should run (in seconds). 46 | scheduler_heartbeat_sec = 5 47 | 48 | # after how much time a new DAGs should be picked up from the filesystem 49 | min_file_process_interval = 0 50 | 51 | statsd_on = False 52 | statsd_host = localhost 53 | statsd_port = 8125 54 | statsd_prefix = airflow 55 | 56 | # How many seconds to wait between file-parsing loops to prevent the logs from being spammed. 57 | min_file_parsing_loop_time = 1 58 | 59 | print_stats_interval = 30 60 | scheduler_zombie_task_threshold = 300 61 | max_tis_per_query = 0 62 | authenticate = False 63 | 64 | # Turn off scheduler catchup by setting this to False. 65 | # Default behavior is unchanged and 66 | # Command Line Backfills still work, but the scheduler 67 | # will not do scheduler catchup if this is False, 68 | # however it can be set on a per DAG basis in the 69 | # DAG definition (catchup) 70 | catchup_by_default = True 71 | 72 | [webserver] 73 | # The base url of your website as airflow cannot guess what domain or 74 | # cname you are using. This is used in automated emails that 75 | # airflow sends to point links to the right web server 76 | base_url = http://0.0.0.0:8080 77 | rbac=True 78 | 79 | # The ip specified when starting the web server 80 | web_server_host = 0.0.0.0 81 | 82 | # The port on which to run the web server 83 | web_server_port = 8080 84 | 85 | # Paths to the SSL certificate and key for the web server. When both are 86 | # provided SSL will be enabled. This does not change the web server port. 87 | web_server_ssl_cert = 88 | web_server_ssl_key = 89 | 90 | # Number of seconds the webserver waits before killing gunicorn master that doesn't respond 91 | web_server_master_timeout = 120 92 | 93 | # Number of seconds the gunicorn webserver waits before timing out on a worker 94 | web_server_worker_timeout = 120 95 | 96 | # Number of workers to refresh at a time. When set to 0, worker refresh is 97 | # disabled. When nonzero, airflow periodically refreshes webserver workers by 98 | # bringing up new ones and killing old ones. 99 | worker_refresh_batch_size = 1 100 | 101 | # Number of seconds to wait before refreshing a batch of workers. 102 | worker_refresh_interval = 30 103 | 104 | # Secret key used to run your flask app 105 | secret_key = temporary_key 106 | 107 | # Number of workers to run the Gunicorn web server 108 | workers = 4 109 | 110 | # The worker class gunicorn should use. Choices include 111 | # sync (default), eventlet, gevent 112 | worker_class = sync 113 | 114 | # Log files for the gunicorn webserver. '-' means log to stderr. 115 | access_logfile = - 116 | error_logfile = - 117 | 118 | # Expose the configuration file in the web server 119 | expose_config = False 120 | 121 | # Default DAG view. Valid values are: 122 | # tree, graph, duration, gantt, landing_times 123 | dag_default_view = tree 124 | 125 | # Default DAG orientation. Valid values are: 126 | # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) 127 | dag_orientation = LR 128 | 129 | # Puts the webserver in demonstration mode; blurs the names of Operators for 130 | # privacy. 131 | demo_mode = False 132 | 133 | # The amount of time (in secs) webserver will wait for initial handshake 134 | # while fetching logs from other worker machine 135 | log_fetch_timeout_sec = 5 136 | 137 | # By default, the webserver shows paused DAGs. Flip this to hide paused 138 | # DAGs by default 139 | hide_paused_dags_by_default = False 140 | 141 | # Consistent page size across all listing views in the UI 142 | page_size = 100 143 | 144 | [smtp] 145 | # If you want airflow to send emails on retries, failure, and you want to use 146 | # the airflow.utils.email.send_email_smtp function, you have to configure an 147 | # smtp server here 148 | smtp_host = localhost 149 | smtp_starttls = True 150 | smtp_ssl = False 151 | # Uncomment and set the user/pass settings if you want to use SMTP AUTH 152 | # smtp_user = airflow 153 | # smtp_password = airflow 154 | smtp_port = 25 155 | smtp_mail_from = airflow@example.com 156 | 157 | [kubernetes] 158 | airflow_configmap = airflow-configmap 159 | worker_container_repository = 542082525742.dkr.ecr.us-west-2.amazonaws.com/airflow-eks-demo 160 | worker_container_tag = latest 161 | worker_container_image_pull_policy = Always 162 | worker_service_account_name = airflow 163 | namespace = airflow 164 | delete_worker_pods = True 165 | dags_in_image = False 166 | git_repo = https://github.com/apache/airflow.git 167 | git_branch = master 168 | git_subpath = airflow/contrib/example_dags/ 169 | git_user = 170 | git_password = 171 | git_sync_root = /git 172 | git_sync_path = repo 173 | git_dags_folder_mount_point = 174 | dags_volume_claim = airflow-efs-pvc 175 | dags_volume_subpath = 176 | logs_volume_claim = 177 | logs_volume_subpath = 178 | dags_volume_host = 179 | logs_volume_host = 180 | in_cluster = True 181 | gcp_service_account_keys = 182 | 183 | # Example affinity and toleration definitions. 184 | affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"weight":1,"preference":[{"matchExpressions":[{"key":"lifecycle","operator":"In","value":"Ec2Spot"}]}],"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} 185 | tolerations = [{ "key": "spotInstance", "operator": "Equal", "value": "true", "effect": "PreferNoSchedule" },{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] 186 | # affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} 187 | # tolerations = [{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] 188 | 189 | # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync 190 | git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 191 | git_sync_container_tag = v2.0.5 192 | git_sync_init_container_name = git-sync-clone 193 | 194 | [kubernetes_node_selectors] 195 | # The Key-value pairs to be given to worker pods. 196 | # The worker pods will be scheduled to the nodes of the specified key-value pairs. 197 | # Should be supplied in the format: key = value 198 | 199 | [kubernetes_annotations] 200 | # The Key-value annotations pairs to be given to worker pods. 201 | # Should be supplied in the format: key = value 202 | 203 | [kubernetes_secrets] 204 | SQL_ALCHEMY_CONN = airflow-secrets=sql_alchemy_conn 205 | 206 | [hive] 207 | # Default mapreduce queue for HiveOperator tasks 208 | default_hive_mapred_queue = 209 | 210 | [celery] 211 | # This section only applies if you are using the CeleryExecutor in 212 | # [core] section above 213 | 214 | # The app name that will be used by celery 215 | celery_app_name = airflow.executors.celery_executor 216 | 217 | # The concurrency that will be used when starting workers with the 218 | # "airflow worker" command. This defines the number of task instances that 219 | # a worker will take, so size up your workers based on the resources on 220 | # your worker box and the nature of your tasks 221 | worker_concurrency = 16 222 | 223 | # When you start an airflow worker, airflow starts a tiny web server 224 | # subprocess to serve the workers local log files to the airflow main 225 | # web server, who then builds pages and sends them to users. This defines 226 | # the port on which the logs are served. It needs to be unused, and open 227 | # visible from the main web server to connect into the workers. 228 | worker_log_server_port = 8793 229 | 230 | # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally 231 | # a sqlalchemy database. Refer to the Celery documentation for more 232 | # information. 233 | # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings 234 | broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow 235 | 236 | # The Celery result_backend. When a job finishes, it needs to update the 237 | # metadata of the job. Therefore it will post a message on a message bus, 238 | # or insert it into a database (depending of the backend) 239 | # This status is used by the scheduler to update the state of the task 240 | # The use of a database is highly recommended 241 | # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings 242 | result_backend = db+mysql://airflow:airflow@localhost:3306/airflow 243 | 244 | # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start 245 | # it `airflow flower`. This defines the IP that Celery Flower runs on 246 | flower_host = 0.0.0.0 247 | 248 | # The root URL for Flower 249 | # Ex: flower_url_prefix = /flower 250 | flower_url_prefix = 251 | 252 | # This defines the port that Celery Flower runs on 253 | flower_port = 5555 254 | 255 | # Securing Flower with Basic Authentication 256 | # Accepts user:password pairs separated by a comma 257 | # Example: flower_basic_auth = user1:password1,user2:password2 258 | flower_basic_auth = 259 | 260 | # Default queue that tasks get assigned to and that worker listen on. 261 | default_queue = default 262 | 263 | # How many processes CeleryExecutor uses to sync task state. 264 | # 0 means to use max(1, number of cores - 1) processes. 265 | sync_parallelism = 0 266 | 267 | # Import path for celery configuration options 268 | celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG 269 | 270 | [celery_broker_transport_options] 271 | # The visibility timeout defines the number of seconds to wait for the worker 272 | # to acknowledge the task before the message is redelivered to another worker. 273 | # Make sure to increase the visibility timeout to match the time of the longest 274 | # ETA you're planning to use. Especially important in case of using Redis or SQS 275 | visibility_timeout = 21600 276 | 277 | # In case of using SSL 278 | ssl_active = False 279 | ssl_key = 280 | ssl_cert = 281 | ssl_cacert = 282 | 283 | [dask] 284 | # This section only applies if you are using the DaskExecutor in 285 | # [core] section above 286 | 287 | # The IP address and port of the Dask cluster's scheduler. 288 | cluster_address = 127.0.0.1:8786 289 | # TLS/ SSL settings to access a secured Dask scheduler. 290 | tls_ca = 291 | tls_cert = 292 | tls_key = 293 | 294 | [ldap] 295 | # set this to ldaps://: 296 | uri = 297 | user_filter = objectClass=* 298 | user_name_attr = uid 299 | group_member_attr = memberOf 300 | superuser_filter = 301 | data_profiler_filter = 302 | bind_user = cn=Manager,dc=example,dc=com 303 | bind_password = insecure 304 | basedn = dc=example,dc=com 305 | cacert = /etc/ca/ldap_ca.crt 306 | search_scope = LEVEL 307 | 308 | [kerberos] 309 | ccache = /tmp/airflow_krb5_ccache 310 | # gets augmented with fqdn 311 | principal = airflow 312 | reinit_frequency = 3600 313 | kinit_path = kinit 314 | keytab = airflow.keytab 315 | 316 | [cli] 317 | api_client = airflow.api.client.json_client 318 | endpoint_url = http://0.0.0.0:8080 319 | 320 | [api] 321 | auth_backend = airflow.api.auth.backend.default 322 | 323 | [github_enterprise] 324 | api_rev = v3 325 | 326 | [admin] 327 | # UI to hide sensitive variable fields when set to True 328 | hide_sensitive_variable_fields = True 329 | 330 | [elasticsearch] 331 | elasticsearch_host = 332 | -------------------------------------------------------------------------------- /scripts/kube/templates/configmaps.template.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | apiVersion: v1 18 | kind: ConfigMap 19 | metadata: 20 | name: airflow-configmap 21 | namespace: airflow 22 | data: 23 | airflow.cfg: | 24 | [core] 25 | dags_folder = {{CONFIGMAP_DAGS_FOLDER}} 26 | base_log_folder = /root/airflow/logs 27 | logging_level = INFO 28 | executor = KubernetesExecutor 29 | parallelism = 32 30 | load_examples = False 31 | plugins_folder = /root/airflow/plugins 32 | sql_alchemy_conn = $SQL_ALCHEMY_CONN 33 | 34 | [scheduler] 35 | dag_dir_list_interval = 300 36 | child_process_log_directory = /root/airflow/logs/scheduler 37 | # Task instances listen for external kill signal (when you clear tasks 38 | # from the CLI or the UI), this defines the frequency at which they should 39 | # listen (in seconds). 40 | job_heartbeat_sec = 5 41 | max_threads = 2 42 | 43 | # The scheduler constantly tries to trigger new tasks (look at the 44 | # scheduler section in the docs for more information). This defines 45 | # how often the scheduler should run (in seconds). 46 | scheduler_heartbeat_sec = 5 47 | 48 | # after how much time a new DAGs should be picked up from the filesystem 49 | min_file_process_interval = 0 50 | 51 | statsd_on = False 52 | statsd_host = localhost 53 | statsd_port = 8125 54 | statsd_prefix = airflow 55 | 56 | # How many seconds to wait between file-parsing loops to prevent the logs from being spammed. 57 | min_file_parsing_loop_time = 1 58 | 59 | print_stats_interval = 30 60 | scheduler_zombie_task_threshold = 300 61 | max_tis_per_query = 0 62 | authenticate = False 63 | 64 | # Turn off scheduler catchup by setting this to False. 65 | # Default behavior is unchanged and 66 | # Command Line Backfills still work, but the scheduler 67 | # will not do scheduler catchup if this is False, 68 | # however it can be set on a per DAG basis in the 69 | # DAG definition (catchup) 70 | catchup_by_default = True 71 | 72 | [webserver] 73 | # The base url of your website as airflow cannot guess what domain or 74 | # cname you are using. This is used in automated emails that 75 | # airflow sends to point links to the right web server 76 | base_url = http://0.0.0.0:8080 77 | rbac=True 78 | 79 | # The ip specified when starting the web server 80 | web_server_host = 0.0.0.0 81 | 82 | # The port on which to run the web server 83 | web_server_port = 8080 84 | 85 | # Paths to the SSL certificate and key for the web server. When both are 86 | # provided SSL will be enabled. This does not change the web server port. 87 | web_server_ssl_cert = 88 | web_server_ssl_key = 89 | 90 | # Number of seconds the webserver waits before killing gunicorn master that doesn't respond 91 | web_server_master_timeout = 120 92 | 93 | # Number of seconds the gunicorn webserver waits before timing out on a worker 94 | web_server_worker_timeout = 120 95 | 96 | # Number of workers to refresh at a time. When set to 0, worker refresh is 97 | # disabled. When nonzero, airflow periodically refreshes webserver workers by 98 | # bringing up new ones and killing old ones. 99 | worker_refresh_batch_size = 1 100 | 101 | # Number of seconds to wait before refreshing a batch of workers. 102 | worker_refresh_interval = 30 103 | 104 | # Secret key used to run your flask app 105 | secret_key = temporary_key 106 | 107 | # Number of workers to run the Gunicorn web server 108 | workers = 4 109 | 110 | # The worker class gunicorn should use. Choices include 111 | # sync (default), eventlet, gevent 112 | worker_class = sync 113 | 114 | # Log files for the gunicorn webserver. '-' means log to stderr. 115 | access_logfile = - 116 | error_logfile = - 117 | 118 | # Expose the configuration file in the web server 119 | expose_config = False 120 | 121 | # Default DAG view. Valid values are: 122 | # tree, graph, duration, gantt, landing_times 123 | dag_default_view = tree 124 | 125 | # Default DAG orientation. Valid values are: 126 | # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) 127 | dag_orientation = LR 128 | 129 | # Puts the webserver in demonstration mode; blurs the names of Operators for 130 | # privacy. 131 | demo_mode = False 132 | 133 | # The amount of time (in secs) webserver will wait for initial handshake 134 | # while fetching logs from other worker machine 135 | log_fetch_timeout_sec = 5 136 | 137 | # By default, the webserver shows paused DAGs. Flip this to hide paused 138 | # DAGs by default 139 | hide_paused_dags_by_default = False 140 | 141 | # Consistent page size across all listing views in the UI 142 | page_size = 100 143 | 144 | [smtp] 145 | # If you want airflow to send emails on retries, failure, and you want to use 146 | # the airflow.utils.email.send_email_smtp function, you have to configure an 147 | # smtp server here 148 | smtp_host = localhost 149 | smtp_starttls = True 150 | smtp_ssl = False 151 | # Uncomment and set the user/pass settings if you want to use SMTP AUTH 152 | # smtp_user = airflow 153 | # smtp_password = airflow 154 | smtp_port = 25 155 | smtp_mail_from = airflow@example.com 156 | 157 | [kubernetes] 158 | airflow_configmap = airflow-configmap 159 | worker_container_repository = {{AIRFLOW_IMAGE}} 160 | worker_container_tag = {{AIRFLOW_TAG}} 161 | worker_container_image_pull_policy = Always 162 | worker_service_account_name = airflow 163 | namespace = airflow 164 | delete_worker_pods = True 165 | dags_in_image = False 166 | git_repo = https://github.com/{{CONFIGMAP_GIT_REPO}}.git 167 | git_branch = {{CONFIGMAP_BRANCH}} 168 | git_subpath = airflow/contrib/example_dags/ 169 | git_user = 170 | git_password = 171 | git_sync_root = /git 172 | git_sync_path = repo 173 | git_dags_folder_mount_point = {{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}} 174 | dags_volume_claim = {{CONFIGMAP_DAGS_VOLUME_CLAIM}} 175 | dags_volume_subpath = 176 | logs_volume_claim = 177 | logs_volume_subpath = 178 | dags_volume_host = 179 | logs_volume_host = 180 | in_cluster = True 181 | gcp_service_account_keys = 182 | 183 | # Example affinity and toleration definitions. 184 | affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"weight":1,"preference":[{"matchExpressions":[{"key":"lifecycle","operator":"In","value":"Ec2Spot"}]}],"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} 185 | tolerations = [{ "key": "spotInstance", "operator": "Equal", "value": "true", "effect": "PreferNoSchedule" },{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] 186 | # affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} 187 | # tolerations = [{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] 188 | 189 | # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync 190 | git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 191 | git_sync_container_tag = v2.0.5 192 | git_sync_init_container_name = git-sync-clone 193 | 194 | [kubernetes_node_selectors] 195 | # The Key-value pairs to be given to worker pods. 196 | # The worker pods will be scheduled to the nodes of the specified key-value pairs. 197 | # Should be supplied in the format: key = value 198 | 199 | [kubernetes_annotations] 200 | # The Key-value annotations pairs to be given to worker pods. 201 | # Should be supplied in the format: key = value 202 | 203 | [kubernetes_secrets] 204 | SQL_ALCHEMY_CONN = airflow-secrets=sql_alchemy_conn 205 | 206 | [hive] 207 | # Default mapreduce queue for HiveOperator tasks 208 | default_hive_mapred_queue = 209 | 210 | [celery] 211 | # This section only applies if you are using the CeleryExecutor in 212 | # [core] section above 213 | 214 | # The app name that will be used by celery 215 | celery_app_name = airflow.executors.celery_executor 216 | 217 | # The concurrency that will be used when starting workers with the 218 | # "airflow worker" command. This defines the number of task instances that 219 | # a worker will take, so size up your workers based on the resources on 220 | # your worker box and the nature of your tasks 221 | worker_concurrency = 16 222 | 223 | # When you start an airflow worker, airflow starts a tiny web server 224 | # subprocess to serve the workers local log files to the airflow main 225 | # web server, who then builds pages and sends them to users. This defines 226 | # the port on which the logs are served. It needs to be unused, and open 227 | # visible from the main web server to connect into the workers. 228 | worker_log_server_port = 8793 229 | 230 | # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally 231 | # a sqlalchemy database. Refer to the Celery documentation for more 232 | # information. 233 | # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings 234 | broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow 235 | 236 | # The Celery result_backend. When a job finishes, it needs to update the 237 | # metadata of the job. Therefore it will post a message on a message bus, 238 | # or insert it into a database (depending of the backend) 239 | # This status is used by the scheduler to update the state of the task 240 | # The use of a database is highly recommended 241 | # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings 242 | result_backend = db+mysql://airflow:airflow@localhost:3306/airflow 243 | 244 | # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start 245 | # it `airflow flower`. This defines the IP that Celery Flower runs on 246 | flower_host = 0.0.0.0 247 | 248 | # The root URL for Flower 249 | # Ex: flower_url_prefix = /flower 250 | flower_url_prefix = 251 | 252 | # This defines the port that Celery Flower runs on 253 | flower_port = 5555 254 | 255 | # Securing Flower with Basic Authentication 256 | # Accepts user:password pairs separated by a comma 257 | # Example: flower_basic_auth = user1:password1,user2:password2 258 | flower_basic_auth = 259 | 260 | # Default queue that tasks get assigned to and that worker listen on. 261 | default_queue = default 262 | 263 | # How many processes CeleryExecutor uses to sync task state. 264 | # 0 means to use max(1, number of cores - 1) processes. 265 | sync_parallelism = 0 266 | 267 | # Import path for celery configuration options 268 | celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG 269 | 270 | [celery_broker_transport_options] 271 | # The visibility timeout defines the number of seconds to wait for the worker 272 | # to acknowledge the task before the message is redelivered to another worker. 273 | # Make sure to increase the visibility timeout to match the time of the longest 274 | # ETA you're planning to use. Especially important in case of using Redis or SQS 275 | visibility_timeout = 21600 276 | 277 | # In case of using SSL 278 | ssl_active = False 279 | ssl_key = 280 | ssl_cert = 281 | ssl_cacert = 282 | 283 | [dask] 284 | # This section only applies if you are using the DaskExecutor in 285 | # [core] section above 286 | 287 | # The IP address and port of the Dask cluster's scheduler. 288 | cluster_address = 127.0.0.1:8786 289 | # TLS/ SSL settings to access a secured Dask scheduler. 290 | tls_ca = 291 | tls_cert = 292 | tls_key = 293 | 294 | [ldap] 295 | # set this to ldaps://: 296 | uri = 297 | user_filter = objectClass=* 298 | user_name_attr = uid 299 | group_member_attr = memberOf 300 | superuser_filter = 301 | data_profiler_filter = 302 | bind_user = cn=Manager,dc=example,dc=com 303 | bind_password = insecure 304 | basedn = dc=example,dc=com 305 | cacert = /etc/ca/ldap_ca.crt 306 | search_scope = LEVEL 307 | 308 | [kerberos] 309 | ccache = /tmp/airflow_krb5_ccache 310 | # gets augmented with fqdn 311 | principal = airflow 312 | reinit_frequency = 3600 313 | kinit_path = kinit 314 | keytab = airflow.keytab 315 | 316 | [cli] 317 | api_client = airflow.api.client.json_client 318 | endpoint_url = http://0.0.0.0:8080 319 | 320 | [api] 321 | auth_backend = airflow.api.auth.backend.default 322 | 323 | [github_enterprise] 324 | api_rev = v3 325 | 326 | [admin] 327 | # UI to hide sensitive variable fields when set to True 328 | hide_sensitive_variable_fields = True 329 | 330 | [elasticsearch] 331 | elasticsearch_host = 332 | --------------------------------------------------------------------------------