├── k8s ├── scripts │ ├── requirements.txt │ └── build_lambda_package.sh ├── charts │ ├── airflow │ │ ├── ci │ │ │ ├── default-values.yaml │ │ │ └── legacy-values.yaml │ │ ├── templates │ │ │ ├── extra-manifests.yaml │ │ │ ├── sync │ │ │ │ ├── sync-pools-secret.yaml │ │ │ │ ├── sync-users-secret.yaml │ │ │ │ ├── sync-variables-secret.yaml │ │ │ │ ├── sync-connections-secret.yaml │ │ │ │ ├── _helpers │ │ │ │ │ ├── sync_variables.tpl │ │ │ │ │ ├── sync_pools.tpl │ │ │ │ │ ├── global_code.tpl │ │ │ │ │ ├── sync_users.tpl │ │ │ │ │ └── sync_connections.tpl │ │ │ │ ├── sync-pools-job.yaml │ │ │ │ ├── sync-pools-deployment.yaml │ │ │ │ ├── sync-users-job.yaml │ │ │ │ ├── sync-variables-job.yaml │ │ │ │ ├── sync-connections-job.yaml │ │ │ │ ├── sync-users-deployment.yaml │ │ │ │ ├── sync-variables-deployment.yaml │ │ │ │ └── sync-connections-deployment.yaml │ │ │ ├── config │ │ │ │ ├── secret-known-hosts.yaml │ │ │ │ ├── secret-local-settings.yaml │ │ │ │ ├── secret-webserver-config.yaml │ │ │ │ └── configmap-pod-template.yaml │ │ │ ├── db-migrations │ │ │ │ ├── db-migrations-secret.yaml │ │ │ │ ├── _helpers │ │ │ │ │ └── code.tpl │ │ │ │ ├── db-migrations-job.yaml │ │ │ │ └── db-migrations-deployment.yaml │ │ │ ├── rbac │ │ │ │ ├── airflow-serviceaccount.yaml │ │ │ │ ├── airflow-rolebinding.yaml │ │ │ │ └── airflow-role.yaml │ │ │ ├── pgbouncer │ │ │ │ ├── pgbouncer-secret.yaml │ │ │ │ ├── pgbouncer-service.yaml │ │ │ │ ├── pgbouncer-secret-certs.yaml │ │ │ │ ├── pgbouncer-pdb.yaml │ │ │ │ └── _helpers │ │ │ │ │ └── pgbouncer.tpl │ │ │ ├── webserver │ │ │ │ ├── webserver-prometheus-rule.yaml │ │ │ │ ├── webserver-service-monitor.yaml │ │ │ │ ├── webserver-pdb.yaml │ │ │ │ ├── webserver-service.yaml │ │ │ │ ├── webserver-ingress-v1beta1.yaml │ │ │ │ ├── webserver-ingress.yaml │ │ │ │ └── webserver-deployment.yaml │ │ │ ├── worker │ │ │ │ ├── worker-service.yaml │ │ │ │ ├── worker-hpa.yaml │ │ │ │ ├── worker-pdb.yaml │ │ │ │ └── worker-statefulset.yaml │ │ │ ├── pvc-dags.yaml │ │ │ ├── pvc-logs.yaml │ │ │ ├── flower │ │ │ │ ├── flower-pdb.yaml │ │ │ │ ├── flower-service.yaml │ │ │ │ ├── flower-ingress-v1beta1.yaml │ │ │ │ ├── flower-ingress.yaml │ │ │ │ └── flower-deployment.yaml │ │ │ ├── scheduler │ │ │ │ ├── scheduler-pdb.yaml │ │ │ │ └── scheduler-deployment.yaml │ │ │ └── _helpers │ │ │ │ └── common.tpl │ │ ├── files │ │ │ ├── webserver_config.py │ │ │ └── pod_template.kubernetes-helm-yaml │ │ ├── Chart.lock │ │ └── Chart.yaml │ └── argocd │ │ ├── namespace.yaml │ │ └── auth.yaml ├── resources │ ├── key-pair.tf │ ├── backend.tf │ ├── variables.tf │ ├── emr-codes-bucket.tf │ ├── versions.tf │ ├── output.tf │ ├── sns.tf │ ├── vpc.tf │ ├── provider.tf │ ├── sg.tf │ ├── rds.tf │ ├── eks.tf │ ├── lambda.tf │ ├── glue-crawler.tf │ ├── apps.tf │ └── cluster-redshift.tf ├── airflow_access_git_repo │ └── ssh.yaml ├── apps │ └── airflow-app.yaml ├── secrets │ └── keys.yml ├── lambda-function │ └── lambda_function.py └── codes │ ├── transformation.py │ ├── variables.py │ └── csv-to-parquet.py ├── connect.txt ├── .gitignore └── .github └── workflows ├── verify.yml ├── deploy.yml └── destroy.yml /k8s/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.26.0 -------------------------------------------------------------------------------- /k8s/charts/airflow/ci/default-values.yaml: -------------------------------------------------------------------------------- 1 | ## this values file tests the defaults 2 | ## 3 | -------------------------------------------------------------------------------- /connect.txt: -------------------------------------------------------------------------------- 1 | aws emr create-default-roles 2 | 3 | aws eks --region us-east-1 update-kubeconfig --name vini-cluster-eks -------------------------------------------------------------------------------- /k8s/resources/key-pair.tf: -------------------------------------------------------------------------------- 1 | resource "aws_key_pair" "my-key" { 2 | key_name = "my-key" 3 | public_key = "YOUR-PUBLIC-KEY" 4 | } -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/extra-manifests.yaml: -------------------------------------------------------------------------------- 1 | {{- range .Values.extraManifests }} 2 | --- 3 | {{ tpl (toYaml .) $ }} 4 | {{- end }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | aws-key.txt 2 | *.terraform 3 | *.terraform.* 4 | k8s/resources/kubeconfig_eks-cluster-vini-campos-etl-aws 5 | aws_key.json 6 | connect.txt 7 | key-emr.txt 8 | __pycache__ 9 | t.py -------------------------------------------------------------------------------- /k8s/resources/backend.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "s3" { 3 | bucket = "tfstate-vini-campos-etl-aws-poc" 4 | key = "terraform/tfstate" 5 | region = "us-east-1" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /k8s/charts/argocd/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: argocd 5 | 6 | --- 7 | 8 | apiVersion: v1 9 | kind: Namespace 10 | metadata: 11 | name: airflow -------------------------------------------------------------------------------- /k8s/charts/airflow/ci/legacy-values.yaml: -------------------------------------------------------------------------------- 1 | ## this values file tests legacy airflow 2 | ## 3 | airflow: 4 | legacyCommands: true 5 | image: 6 | repository: apache/airflow 7 | tag: 1.10.12-python3.8 8 | -------------------------------------------------------------------------------- /k8s/airflow_access_git_repo/ssh.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: airflow-http-git-secret 5 | namespace: airflow 6 | type: Opaque 7 | data: 8 | username: your-username-with-base64 # you can use => echo -n "username" | base64 9 | stringData: 10 | password: your-github-token -------------------------------------------------------------------------------- /k8s/charts/airflow/files/webserver_config.py: -------------------------------------------------------------------------------- 1 | from airflow import configuration as conf 2 | from flask_appbuilder.security.manager import AUTH_DB 3 | 4 | # the SQLAlchemy connection string 5 | SQLALCHEMY_DATABASE_URI = conf.get('core', 'SQL_ALCHEMY_CONN') 6 | 7 | # use embedded DB for auth 8 | AUTH_TYPE = AUTH_DB 9 | -------------------------------------------------------------------------------- /k8s/charts/argocd/auth.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: private-bgd-repo 5 | namespace: argocd-repo 6 | labels: 7 | argocd.argoproj.io/secret-type: repository 8 | stringData: 9 | url: https://github.com/your/private-repo.git 10 | password: your-github-token 11 | username: your-username -------------------------------------------------------------------------------- /k8s/charts/airflow/Chart.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: postgresql 3 | repository: https://charts.helm.sh/stable 4 | version: 8.6.4 5 | - name: redis 6 | repository: https://charts.helm.sh/stable 7 | version: 10.5.7 8 | digest: sha256:bfe30fcaf72a0609856bf1a5dab2b941e8ef89b30ba935aa820743a6758f4fc4 9 | generated: "2021-07-09T14:11:48.265069+10:00" 10 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-pools-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.airflow.pools }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-sync-pools 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: sync-pools 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | data: 13 | sync_pools.py: {{ include "airflow.sync.sync_pools.py" . | b64enc | quote }} 14 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-users-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.airflow.users }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-sync-users 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: sync-users 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | data: 13 | sync_users.py: {{ include "airflow.sync.sync_users.py" . | b64enc | quote }} 14 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/config/secret-known-hosts.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.dags.gitSync.enabled) (.Values.dags.gitSync.sshKnownHosts) }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-known-hosts 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | data: 12 | known_hosts: {{ .Values.dags.gitSync.sshKnownHosts | b64enc | quote }} 13 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-variables-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.airflow.variables }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-sync-variables 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: sync-variables 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | data: 13 | sync_variables.py: {{ include "airflow.sync.sync_variables.py" . | b64enc | quote }} 14 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-connections-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.airflow.connections }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-sync-connections 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: sync-connections 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | data: 13 | sync_connections.py: {{ include "airflow.sync.sync_connections.py" . | b64enc | quote }} 14 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/db-migrations/db-migrations-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.airflow.dbMigrations.enabled }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-db-migrations 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: db-migrations 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | data: 13 | db_migrations.py: {{ include "airflow.db_migrations.db_migrations.py" . | b64enc | quote }} 14 | {{- end }} -------------------------------------------------------------------------------- /k8s/apps/airflow-app.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Application 3 | metadata: 4 | name: airflow 5 | namespace: argocd 6 | finalizers: 7 | - resources-finalizer.argocd.argoproj.io 8 | spec: 9 | project: default 10 | source: 11 | repoURL: https://github.com/your/private-repo.git 12 | targetRevision: main 13 | path: k8s/charts/airflow 14 | destination: 15 | server: https://kubernetes.default.svc 16 | namespace: airflow 17 | syncPolicy: 18 | automated: 19 | selfHeal: true 20 | syncOptions: 21 | - CreateNamespace=false -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/rbac/airflow-serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create }} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "airflow.serviceAccountName" . }} 6 | {{- if .Values.serviceAccount.annotations }} 7 | annotations: 8 | {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} 9 | {{- end }} 10 | labels: 11 | app: {{ include "airflow.labels.app" . }} 12 | chart: {{ include "airflow.labels.chart" . }} 13 | release: {{ .Release.Name }} 14 | heritage: {{ .Release.Service }} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/config/secret-local-settings.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.localSettings.stringOverride) (not .Values.airflow.localSettings.existingSecret) }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-local-settings 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | data: 12 | airflow_local_settings.py: {{ .Values.airflow.localSettings.stringOverride | b64enc | quote }} 13 | {{- end }} -------------------------------------------------------------------------------- /k8s/resources/variables.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | default = "us-east-1" 3 | } 4 | 5 | variable "cluster_name" { 6 | default = "vini-cluster-eks" 7 | } 8 | 9 | variable "redshift_user" { 10 | default = "your-redshift-user" 11 | } 12 | variable "redshift_pass" { 13 | default = "your-redshift-password" 14 | } 15 | 16 | variable "redshift_db" { 17 | default = "etlvini" 18 | } 19 | 20 | variable "postgres_user" { 21 | default = "your-postgres-user" 22 | } 23 | 24 | variable "postgres_pass" { 25 | default = "your-postgres-password" 26 | } 27 | 28 | variable "email" { 29 | default = "your-email" 30 | } -------------------------------------------------------------------------------- /k8s/resources/emr-codes-bucket.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "emr_codes_bucket" { 2 | bucket = "emr-code-zone-vini-etl-aws" 3 | force_destroy = true 4 | } 5 | 6 | resource "aws_s3_bucket" "athena-results" { 7 | bucket = "athena-results-vini-etl-aws" 8 | force_destroy = true 9 | } 10 | 11 | resource "aws_s3_bucket_object" "codes_object" { 12 | for_each = fileset("../codes/", "*") 13 | 14 | bucket = aws_s3_bucket.emr_codes_bucket.id 15 | key = each.key 16 | source = "../codes/${each.key}" 17 | force_destroy = true 18 | 19 | depends_on = [aws_s3_bucket.emr_codes_bucket] 20 | } -------------------------------------------------------------------------------- /k8s/resources/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | 3 | required_providers { 4 | aws = { 5 | source = "hashicorp/aws" 6 | version = "3.73.0" 7 | } 8 | 9 | random = { 10 | source = "hashicorp/random" 11 | version = "3.1.0" 12 | } 13 | 14 | local = { 15 | source = "hashicorp/local" 16 | version = "2.1.0" 17 | } 18 | 19 | null = { 20 | source = "hashicorp/null" 21 | version = "3.1.0" 22 | } 23 | 24 | kubectl = { 25 | source = "gavinbunney/kubectl" 26 | version = ">= 1.7.0" 27 | } 28 | 29 | } 30 | 31 | required_version = ">= 0.14" 32 | } -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/pgbouncer/pgbouncer-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if include "airflow.pgbouncer.should_use" . }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-pgbouncer 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: pgbouncer 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | data: 13 | pgbouncer.ini: {{ include "airflow.pgbouncer.pgbouncer.ini" . | b64enc | quote }} 14 | gen_auth_file.sh: {{ include "airflow.pgbouncer.gen_auth_file.sh" . | b64enc | quote }} 15 | {{- end }} -------------------------------------------------------------------------------- /k8s/resources/output.tf: -------------------------------------------------------------------------------- 1 | output "cluster_id" { 2 | description = "EKS cluster ID." 3 | value = module.eks.cluster_id 4 | } 5 | 6 | output "cluster_endpoint" { 7 | description = "Endpoint for EKS control plane." 8 | value = module.eks.cluster_endpoint 9 | } 10 | 11 | output "cluster_security_group_id" { 12 | description = "Security group ids attached to the cluster control plane." 13 | value = module.eks.cluster_security_group_id 14 | } 15 | 16 | output "region" { 17 | description = "AWS region" 18 | value = var.region 19 | } 20 | 21 | output "cluster_name" { 22 | description = "Kubernetes Cluster Name" 23 | value = var.cluster_name 24 | } -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/rbac/airflow-rolebinding.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.create }} 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: RoleBinding 4 | metadata: 5 | name: {{ include "airflow.fullname" . }} 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | roleRef: 12 | apiGroup: rbac.authorization.k8s.io 13 | kind: Role 14 | name: {{ include "airflow.fullname" . }} 15 | subjects: 16 | - kind: ServiceAccount 17 | name: {{ include "airflow.serviceAccountName" . }} 18 | namespace: {{ .Release.Namespace }} 19 | {{- end }} 20 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/pgbouncer/pgbouncer-service.yaml: -------------------------------------------------------------------------------- 1 | {{- if include "airflow.pgbouncer.should_use" . }} 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-pgbouncer 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: pgbouncer 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | type: ClusterIP 14 | selector: 15 | app: {{ include "airflow.labels.app" . }} 16 | component: pgbouncer 17 | release: {{ .Release.Name }} 18 | ports: 19 | - name: pgbouncer 20 | protocol: TCP 21 | port: 6432 22 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/webserver/webserver-prometheus-rule.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.prometheusRule.enabled }} 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: PrometheusRule 4 | metadata: 5 | name: {{ include "airflow.fullname" . }} 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: web 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | {{- if .Values.prometheusRule.additionalLabels }} 13 | {{- toYaml .Values.prometheusRule.additionalLabels | nindent 4 }} 14 | {{- end }} 15 | spec: 16 | groups: 17 | {{- toYaml .Values.prometheusRule.groups | nindent 4 }} 18 | {{- end }} 19 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/config/secret-webserver-config.yaml: -------------------------------------------------------------------------------- 1 | {{- if not .Values.web.webserverConfig.existingSecret }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-webserver-config 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | data: 12 | {{- if .Values.web.webserverConfig.stringOverride }} 13 | webserver_config.py: {{ .Values.web.webserverConfig.stringOverride | b64enc | quote }} 14 | {{- else }} 15 | webserver_config.py: {{ tpl (.Files.Get "files/webserver_config.py") . | b64enc | quote }} 16 | {{- end }} 17 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/config/configmap-pod-template.yaml: -------------------------------------------------------------------------------- 1 | {{- if include "airflow.executor.kubernetes_like" . }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-pod-template 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | data: 12 | pod_template.yaml: |- 13 | {{- if .Values.airflow.kubernetesPodTemplate.stringOverride }} 14 | {{- .Values.airflow.kubernetesPodTemplate.stringOverride | nindent 4 }} 15 | {{- else }} 16 | {{- tpl (.Files.Get "files/pod_template.kubernetes-helm-yaml") . | nindent 4 }} 17 | {{- end }} 18 | {{- end }} -------------------------------------------------------------------------------- /k8s/resources/sns.tf: -------------------------------------------------------------------------------- 1 | resource "aws_sns_topic" "mysns" { 2 | name = "send-email" 3 | } 4 | 5 | resource "aws_sns_topic_subscription" "send-email" { 6 | topic_arn = aws_sns_topic.mysns.arn 7 | protocol = "email" 8 | endpoint = var.email 9 | 10 | depends_on = [ 11 | aws_sns_topic.mysns 12 | ] 13 | } 14 | 15 | data "aws_iam_policy_document" "sns_topic_policy" { 16 | policy_id = "__default_policy_ID" 17 | 18 | statement { 19 | actions = [ 20 | "SNS:Publish" 21 | ] 22 | 23 | effect = "Allow" 24 | 25 | principals { 26 | type = "AWS" 27 | identifiers = ["*"] 28 | } 29 | 30 | resources = [ 31 | aws_sns_topic.mysns.arn, 32 | ] 33 | 34 | sid = "__default_statement_ID" 35 | } 36 | } -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/worker/worker-service.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.workers.enabled }} 2 | apiVersion: v1 3 | ## this Service gives stable DNS entries for workers, used by webserver for logs 4 | kind: Service 5 | metadata: 6 | name: {{ include "airflow.fullname" . }}-worker 7 | labels: 8 | app: {{ include "airflow.labels.app" . }} 9 | component: worker 10 | chart: {{ include "airflow.labels.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | spec: 14 | ports: 15 | - name: worker 16 | protocol: TCP 17 | port: 8793 18 | clusterIP: None 19 | selector: 20 | app: {{ include "airflow.labels.app" . }} 21 | component: worker 22 | release: {{ .Release.Name }} 23 | {{- end }} 24 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/worker/worker-hpa.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.workers.enabled) (.Values.workers.autoscaling.enabled) }} 2 | apiVersion: autoscaling/v2beta2 3 | kind: HorizontalPodAutoscaler 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-worker 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: worker 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | scaleTargetRef: 14 | apiVersion: apps/v1 15 | kind: StatefulSet 16 | name: {{ include "airflow.fullname" . }}-worker 17 | minReplicas: {{ .Values.workers.replicas }} 18 | maxReplicas: {{ .Values.workers.autoscaling.maxReplicas }} 19 | metrics: 20 | {{- toYaml .Values.workers.autoscaling.metrics | nindent 4 }} 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /k8s/charts/airflow/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | description: Airflow Helm Chart (User Community) - used to deploy Apache Airflow on Kubernetes 3 | name: airflow 4 | version: 8.5.2 5 | appVersion: 2.1.2 6 | icon: https://airflow.apache.org/_images/pin_large.png 7 | home: https://github.com/airflow-helm/charts 8 | maintainers: 9 | - name: thesuperzapper 10 | url: https://github.com/thesuperzapper 11 | - name: gsemet 12 | url: https://github.com/gsemet 13 | sources: 14 | - https://github.com/airflow-helm/charts/tree/main/charts/airflow 15 | keywords: 16 | - airflow 17 | - dag 18 | - workflow 19 | dependencies: 20 | - name: postgresql 21 | version: 8.6.4 22 | repository: https://charts.helm.sh/stable 23 | condition: postgresql.enabled 24 | - name: redis 25 | version: 10.5.7 26 | repository: https://charts.helm.sh/stable 27 | condition: redis.enabled 28 | -------------------------------------------------------------------------------- /k8s/secrets/keys.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: aws-credentials 5 | namespace: airflow 6 | type: Opaque 7 | data: 8 | aws_access_key_id: your-aws_access_key_id-base64 9 | aws_secret_access_key: your-aws_secret_access_key-base64 10 | 11 | --- 12 | 13 | apiVersion: v1 14 | kind: Secret 15 | metadata: 16 | name: postgres-credentials 17 | namespace: airflow 18 | type: Opaque 19 | data: 20 | postgres_password: your-postgres_password-base64 21 | 22 | --- 23 | 24 | apiVersion: v1 25 | kind: Secret 26 | metadata: 27 | name: redshift-credentials 28 | namespace: airflow 29 | type: Opaque 30 | data: 31 | redshift_password: your-redshift_password-base64 32 | 33 | --- 34 | 35 | apiVersion: v1 36 | kind: Secret 37 | metadata: 38 | name: token-credential 39 | namespace: airflow 40 | type: Opaque 41 | data: 42 | token: your-github-token-base64 -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/webserver/webserver-service-monitor.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceMonitor.enabled }} 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | name: {{ include "airflow.fullname" . }} 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: web 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | {{- if .Values.serviceMonitor.selector }} 13 | {{- toYaml .Values.serviceMonitor.selector | nindent 4 }} 14 | {{- end }} 15 | spec: 16 | selector: 17 | matchLabels: 18 | app: {{ include "airflow.labels.app" . }} 19 | component: web 20 | release: {{ .Release.Name }} 21 | endpoints: 22 | - port: web 23 | path: {{ .Values.serviceMonitor.path }} 24 | interval: {{ .Values.serviceMonitor.interval }} 25 | {{- end }} 26 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/webserver/webserver-pdb.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.web.podDisruptionBudget.enabled }} 2 | apiVersion: policy/v1beta1 3 | kind: PodDisruptionBudget 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-web 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: web 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | {{- if .Values.web.podDisruptionBudget.maxUnavailable }} 14 | maxUnavailable: {{ .Values.web.podDisruptionBudget.maxUnavailable }} 15 | {{- end }} 16 | {{- if .Values.web.podDisruptionBudget.minAvailable }} 17 | minAvailable: {{ .Values.web.podDisruptionBudget.minAvailable }} 18 | {{- end }} 19 | selector: 20 | matchLabels: 21 | app: {{ include "airflow.labels.app" . }} 22 | component: web 23 | release: {{ .Release.Name }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/resources/vpc.tf: -------------------------------------------------------------------------------- 1 | module "vpc" { 2 | source = "terraform-aws-modules/vpc/aws" 3 | version = "3.2.0" 4 | 5 | name = "eks-vpc" 6 | cidr = "10.0.0.0/16" 7 | azs = ["${var.region}a", "${var.region}b"] 8 | private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] 9 | public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] 10 | enable_nat_gateway = true 11 | single_nat_gateway = true 12 | enable_dns_hostnames = true 13 | 14 | tags = { 15 | "kubernetes.io/cluster/${var.cluster_name}" = "shared" 16 | } 17 | 18 | public_subnet_tags = { 19 | "kubernetes.io/cluster/${var.cluster_name}" = "shared" 20 | "kubernetes.io/role/elb" = "1" 21 | } 22 | 23 | private_subnet_tags = { 24 | "kubernetes.io/cluster/${var.cluster_name}" = "shared" 25 | "kubernetes.io/role/internal-elb" = "1" 26 | } 27 | } -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/pvc-dags.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.dags.persistence.enabled) (not .Values.dags.persistence.existingClaim) }} 2 | kind: PersistentVolumeClaim 3 | apiVersion: v1 4 | metadata: 5 | name: {{ printf "%s-dags" (include "airflow.fullname" . | trunc 58) }} 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | spec: 12 | accessModes: 13 | - {{ .Values.dags.persistence.accessMode | quote }} 14 | resources: 15 | requests: 16 | storage: {{ .Values.dags.persistence.size | quote }} 17 | {{- if .Values.dags.persistence.storageClass }} 18 | {{- if (eq "-" .Values.dags.persistence.storageClass) }} 19 | storageClassName: "" 20 | {{- else }} 21 | storageClassName: "{{ .Values.dags.persistence.storageClass }}" 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/pvc-logs.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.logs.persistence.enabled) (not .Values.logs.persistence.existingClaim) }} 2 | kind: PersistentVolumeClaim 3 | apiVersion: v1 4 | metadata: 5 | name: {{ printf "%s-logs" (include "airflow.fullname" . | trunc 58) }} 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | spec: 12 | accessModes: 13 | - {{ .Values.logs.persistence.accessMode | quote }} 14 | resources: 15 | requests: 16 | storage: {{ .Values.logs.persistence.size | quote }} 17 | {{- if .Values.logs.persistence.storageClass }} 18 | {{- if (eq "-" .Values.logs.persistence.storageClass) }} 19 | storageClassName: "" 20 | {{- else }} 21 | storageClassName: "{{ .Values.logs.persistence.storageClass }}" 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/rbac/airflow-role.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.create }} 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: {{ include "airflow.fullname" . }} 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | chart: {{ include "airflow.labels.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | rules: 12 | {{- if .Values.rbac.events }} 13 | - apiGroups: 14 | - "" 15 | resources: 16 | - events 17 | verbs: 18 | - "get" 19 | - "list" 20 | {{- end }} 21 | - apiGroups: 22 | - "" 23 | resources: 24 | - pods 25 | verbs: 26 | - "create" 27 | - "get" 28 | - "delete" 29 | - "list" 30 | - "patch" 31 | - "watch" 32 | - apiGroups: 33 | - "" 34 | resources: 35 | - "pods/log" 36 | verbs: 37 | - "get" 38 | - "list" 39 | - apiGroups: 40 | - "" 41 | resources: 42 | - "pods/exec" 43 | verbs: 44 | - "create" 45 | - "get" 46 | {{- end }} 47 | -------------------------------------------------------------------------------- /k8s/resources/provider.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.region 3 | } 4 | 5 | provider "kubernetes" { 6 | host = data.aws_eks_cluster.cluster.endpoint 7 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data) 8 | token = data.aws_eks_cluster_auth.cluster.token 9 | } 10 | 11 | provider "helm" { 12 | kubernetes { 13 | host = data.aws_eks_cluster.cluster.endpoint 14 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data) 15 | token = data.aws_eks_cluster_auth.cluster.token 16 | } 17 | } 18 | 19 | provider "kubectl" { 20 | host = data.aws_eks_cluster.cluster.endpoint 21 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data) 22 | token = data.aws_eks_cluster_auth.cluster.token 23 | load_config_file = false 24 | } -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/flower/flower-pdb.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.flower.enabled) (.Values.flower.podDisruptionBudget.enabled) }} 2 | apiVersion: policy/v1beta1 3 | kind: PodDisruptionBudget 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-flower 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: flower 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | {{- if .Values.flower.podDisruptionBudget.maxUnavailable }} 14 | maxUnavailable: {{ .Values.flower.podDisruptionBudget.maxUnavailable }} 15 | {{- end }} 16 | {{- if .Values.flower.podDisruptionBudget.minAvailable }} 17 | minAvailable: {{ .Values.flower.podDisruptionBudget.minAvailable }} 18 | {{- end }} 19 | selector: 20 | matchLabels: 21 | app: {{ include "airflow.labels.app" . }} 22 | component: flower 23 | release: {{ .Release.Name }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/scheduler/scheduler-pdb.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.scheduler.podDisruptionBudget.enabled }} 2 | apiVersion: policy/v1beta1 3 | kind: PodDisruptionBudget 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-scheduler 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: scheduler 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | {{- if .Values.scheduler.podDisruptionBudget.maxUnavailable }} 14 | maxUnavailable: {{ .Values.scheduler.podDisruptionBudget.maxUnavailable }} 15 | {{- end }} 16 | {{- if .Values.scheduler.podDisruptionBudget.minAvailable }} 17 | minAvailable: {{ .Values.scheduler.podDisruptionBudget.minAvailable }} 18 | {{- end }} 19 | selector: 20 | matchLabels: 21 | app: {{ include "airflow.labels.app" . }} 22 | component: scheduler 23 | release: {{ .Release.Name }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/worker/worker-pdb.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.workers.enabled) (.Values.workers.podDisruptionBudget.enabled) }} 2 | apiVersion: policy/v1beta1 3 | kind: PodDisruptionBudget 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-worker 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: worker 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | {{- if .Values.workers.podDisruptionBudget.maxUnavailable }} 14 | maxUnavailable: {{ .Values.workers.podDisruptionBudget.maxUnavailable }} 15 | {{- end }} 16 | {{- if .Values.workers.podDisruptionBudget.minAvailable }} 17 | minAvailable: {{ .Values.workers.podDisruptionBudget.minAvailable }} 18 | {{- end }} 19 | selector: 20 | matchLabels: 21 | app: {{ include "airflow.labels.app" . }} 22 | component: worker 23 | release: {{ .Release.Name }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/pgbouncer/pgbouncer-secret-certs.yaml: -------------------------------------------------------------------------------- 1 | {{- $self_signed_needed := or (not .Values.pgbouncer.clientSSL.keyFile.existingSecret) (not .Values.pgbouncer.clientSSL.certFile.existingSecret) }} 2 | {{- if and (include "airflow.pgbouncer.should_use" .) ($self_signed_needed) }} 3 | {{- $client_cert := genSelfSignedCert "localhost" nil nil 365 }} 4 | apiVersion: v1 5 | kind: Secret 6 | metadata: 7 | name: {{ include "airflow.fullname" . }}-pgbouncer-certs 8 | labels: 9 | app: {{ include "airflow.labels.app" . }} 10 | component: pgbouncer 11 | chart: {{ include "airflow.labels.chart" . }} 12 | release: {{ .Release.Name }} 13 | heritage: {{ .Release.Service }} 14 | data: 15 | {{- if not .Values.pgbouncer.clientSSL.keyFile.existingSecret }} 16 | client.key: {{ $client_cert.Key | b64enc | quote }} 17 | {{- end }} 18 | {{- if not .Values.pgbouncer.clientSSL.certFile.existingSecret }} 19 | client.crt: {{ $client_cert.Cert | b64enc | quote }} 20 | {{- end }} 21 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/pgbouncer/pgbouncer-pdb.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (include "airflow.pgbouncer.should_use" .) (.Values.pgbouncer.podDisruptionBudget.enabled) }} 2 | apiVersion: policy/v1beta1 3 | kind: PodDisruptionBudget 4 | metadata: 5 | name: {{ include "airflow.fullname" . }}-pgbouncer 6 | labels: 7 | app: {{ include "airflow.labels.app" . }} 8 | component: pgbouncer 9 | chart: {{ include "airflow.labels.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | {{- if .Values.pgbouncer.podDisruptionBudget.maxUnavailable }} 14 | maxUnavailable: {{ .Values.pgbouncer.podDisruptionBudget.maxUnavailable }} 15 | {{- end }} 16 | {{- if .Values.pgbouncer.podDisruptionBudget.minAvailable }} 17 | minAvailable: {{ .Values.pgbouncer.podDisruptionBudget.minAvailable }} 18 | {{- end }} 19 | selector: 20 | matchLabels: 21 | app: {{ include "airflow.labels.app" . }} 22 | component: pgbouncer 23 | release: {{ .Release.Name }} 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /k8s/resources/sg.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "worker_group_mgmt_one" { 2 | name_prefix = "worker_group_mgmt_one" 3 | vpc_id = module.vpc.vpc_id 4 | 5 | ingress { 6 | from_port = 22 7 | to_port = 22 8 | protocol = "tcp" 9 | 10 | cidr_blocks = [ 11 | "10.0.0.0/8", 12 | ] 13 | } 14 | } 15 | 16 | resource "aws_security_group" "worker_group_mgmt_two" { 17 | name_prefix = "worker_group_mgmt_two" 18 | vpc_id = module.vpc.vpc_id 19 | 20 | ingress { 21 | from_port = 22 22 | to_port = 22 23 | protocol = "tcp" 24 | 25 | cidr_blocks = [ 26 | "192.168.0.0/16", 27 | ] 28 | } 29 | } 30 | 31 | resource "aws_security_group" "all_worker_mgmt" { 32 | name_prefix = "all_worker_management" 33 | vpc_id = module.vpc.vpc_id 34 | 35 | ingress { 36 | from_port = 22 37 | to_port = 22 38 | protocol = "tcp" 39 | 40 | cidr_blocks = [ 41 | "10.0.0.0/8", 42 | "172.16.0.0/12", 43 | "192.168.0.0/16", 44 | ] 45 | } 46 | } -------------------------------------------------------------------------------- /k8s/resources/rds.tf: -------------------------------------------------------------------------------- 1 | resource "aws_db_instance" "vinipostgresql-instance" { 2 | identifier = "vinipostgresql-instance" 3 | name = "vinipostgresql" 4 | instance_class = "db.t2.micro" 5 | allocated_storage = 5 6 | engine = "postgres" 7 | engine_version = "12.7" 8 | skip_final_snapshot = true 9 | publicly_accessible = true 10 | vpc_security_group_ids = [aws_security_group.vinipostgresql.id] 11 | username = var.postgres_user 12 | password = var.postgres_user 13 | 14 | tags = { 15 | tag-key = "vini-cluster-postgres-etl-aws" 16 | } 17 | } 18 | 19 | data "aws_vpc" "default" { 20 | default = true 21 | } 22 | 23 | resource "aws_security_group" "vinipostgresql" { 24 | vpc_id = data.aws_vpc.default.id 25 | name = "vinipostgresql" 26 | 27 | ingress { 28 | from_port = 0 29 | to_port = 0 30 | protocol = "-1" 31 | cidr_blocks = ["0.0.0.0/0"] 32 | } 33 | 34 | tags = { 35 | tag-key = "sg-postgres" 36 | } 37 | } -------------------------------------------------------------------------------- /.github/workflows/verify.yml: -------------------------------------------------------------------------------- 1 | name: 'Terraform Verify' 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | 7 | jobs: 8 | pack_deploy: 9 | name: Build and Verify DataLake 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Set up Python 3.8 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.8 18 | 19 | - name: Build Lambda package 20 | run: | 21 | sh k8s/scripts/build_lambda_package.sh 22 | 23 | - name: HashiCorp - Setup Terraform 24 | uses: hashicorp/setup-terraform@v1.2.1 25 | 26 | - name: Configure AWS credentials 27 | uses: aws-actions/configure-aws-credentials@v1 28 | with: 29 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 30 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 31 | aws-region: us-east-1 32 | 33 | - name: IaC Apply 34 | env: 35 | COMMAND_IAC: terraform 36 | run: | 37 | cd k8s/resources 38 | $COMMAND_IAC init 39 | $COMMAND_IAC validate 40 | $COMMAND_IAC plan -------------------------------------------------------------------------------- /k8s/resources/eks.tf: -------------------------------------------------------------------------------- 1 | module "eks" { 2 | source = "terraform-aws-modules/eks/aws" 3 | version = "17.24.0" 4 | cluster_name = var.cluster_name 5 | cluster_version = "1.21" 6 | subnets = module.vpc.private_subnets 7 | 8 | tags = { 9 | Vini = "ETL-AWS" 10 | } 11 | 12 | vpc_id = module.vpc.vpc_id 13 | 14 | workers_group_defaults = { 15 | root_volume_type = "gp2" 16 | } 17 | 18 | worker_groups = [ 19 | { 20 | name = "worker-group-1" 21 | instance_type = "r5.xlarge" 22 | asg_desired_capacity = 1 23 | additional_security_group_ids = [aws_security_group.worker_group_mgmt_one.id] 24 | }, 25 | { 26 | name = "worker-group-2" 27 | instance_type = "c5.2xlarge" 28 | additional_security_group_ids = [aws_security_group.worker_group_mgmt_two.id] 29 | asg_desired_capacity = 1 30 | } 31 | ] 32 | } 33 | 34 | data "aws_eks_cluster" "cluster" { 35 | name = module.eks.cluster_id 36 | } 37 | 38 | data "aws_eks_cluster_auth" "cluster" { 39 | name = module.eks.cluster_id 40 | } -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: 'Terraform Deploy' 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | jobs: 8 | pack_deploy: 9 | name: Build and deploy DataLake 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Set up Python 3.8 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.8 18 | 19 | - name: Build Lambda package 20 | run: | 21 | sh k8s/scripts/build_lambda_package.sh 22 | 23 | - name: HashiCorp - Setup Terraform 24 | uses: hashicorp/setup-terraform@v1.2.1 25 | 26 | - name: Configure AWS credentials 27 | uses: aws-actions/configure-aws-credentials@v1 28 | with: 29 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 30 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 31 | aws-region: us-east-1 32 | 33 | - name: IaC Apply 34 | env: 35 | COMMAND_IAC: terraform 36 | run: | 37 | cd k8s/resources 38 | $COMMAND_IAC init 39 | $COMMAND_IAC validate 40 | $COMMAND_IAC plan 41 | $COMMAND_IAC apply -auto-approve 42 | 43 | -------------------------------------------------------------------------------- /k8s/lambda-function/lambda_function.py: -------------------------------------------------------------------------------- 1 | import requests, io, tempfile, os, boto3 2 | from zipfile import ZipFile 3 | 4 | file_name = 'AdventureWorks.zip' 5 | bucket = "landing-zone-vini-poc-etl-aws" 6 | folder_temp_name = 'temp' 7 | url = 'https://github.com/camposvinicius/data/raw/main/AdventureWorks.zip' 8 | 9 | def lambda_handler(event, context): 10 | 11 | with tempfile.TemporaryDirectory() as temp_path: 12 | temp_dir = os.path.join(temp_path, folder_temp_name) 13 | with open(temp_dir, 'wb') as f: 14 | req = requests.get(url) 15 | f.write(req.content) 16 | s3 = boto3.resource('s3') 17 | s3.Bucket(bucket).upload_file(temp_dir, file_name) 18 | 19 | zip_obj = s3.Object(bucket_name=bucket, key=file_name) 20 | buffer = io.BytesIO(zip_obj.get()["Body"].read()) 21 | 22 | z = ZipFile(buffer) 23 | for filename in z.namelist(): 24 | file_info = z.getinfo(filename) 25 | s3.meta.client.upload_fileobj( 26 | z.open(filename), 27 | Bucket=bucket, 28 | Key='data/' + f'{filename}') 29 | for file in s3.Bucket(bucket).objects.all(): 30 | print(file.key) -------------------------------------------------------------------------------- /.github/workflows/destroy.yml: -------------------------------------------------------------------------------- 1 | name: 'Terraform Destroy' 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | action: 7 | description: "Terraform Destroy Resources" 8 | required: false 9 | default: "destroy" 10 | 11 | jobs: 12 | pack_deploy: 13 | name: Build and Destroy DataLake 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Set up Python 3.8 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: 3.8 22 | 23 | - name: Build Lambda package to Destroy... 24 | run: | 25 | sh k8s/scripts/build_lambda_package.sh 26 | 27 | - name: HashiCorp - Setup Terraform 28 | uses: hashicorp/setup-terraform@v1.2.1 29 | 30 | - name: Configure AWS credentials 31 | uses: aws-actions/configure-aws-credentials@v1 32 | with: 33 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 34 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 35 | aws-region: us-east-1 36 | 37 | - name: IaC Apply 38 | env: 39 | COMMAND_IAC: terraform 40 | run: | 41 | cd k8s/resources 42 | $COMMAND_IAC init 43 | $COMMAND_IAC destroy -auto-approve 44 | -------------------------------------------------------------------------------- /k8s/resources/lambda.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "iam_for_lambda" { 2 | name = "iam_for_lambda" 3 | 4 | assume_role_policy = < /home/pgbouncer/users.txt 72 | echo "Successfully generated auth_file: /home/pgbouncer/users.txt" 73 | {{- end }} 74 | -------------------------------------------------------------------------------- /k8s/resources/apps.tf: -------------------------------------------------------------------------------- 1 | data "kubectl_file_documents" "namespace" { 2 | content = file("../charts/argocd/namespace.yaml") 3 | } 4 | resource "kubectl_manifest" "namespace" { 5 | count = length(data.kubectl_file_documents.namespace.documents) 6 | yaml_body = element(data.kubectl_file_documents.namespace.documents, count.index) 7 | override_namespace = "argocd" 8 | depends_on = [ 9 | data.kubectl_file_documents.namespace, 10 | module.eks 11 | ] 12 | } 13 | 14 | data "kubectl_file_documents" "argocd" { 15 | content = file("../charts/argocd/install.yaml") 16 | } 17 | 18 | resource "kubectl_manifest" "argocd" { 19 | count = length(data.kubectl_file_documents.argocd.documents) 20 | yaml_body = element(data.kubectl_file_documents.argocd.documents, count.index) 21 | override_namespace = "argocd" 22 | depends_on = [ 23 | kubectl_manifest.namespace, 24 | data.kubectl_file_documents.argocd, 25 | module.eks 26 | ] 27 | } 28 | 29 | data "kubectl_file_documents" "git" { 30 | content = file("../charts/argocd/auth.yaml") 31 | } 32 | 33 | resource "kubectl_manifest" "git" { 34 | count = length(data.kubectl_file_documents.git.documents) 35 | yaml_body = element(data.kubectl_file_documents.git.documents, count.index) 36 | override_namespace = "argocd" 37 | depends_on = [ 38 | kubectl_manifest.argocd, 39 | data.kubectl_file_documents.git 40 | ] 41 | } 42 | 43 | data "kubectl_file_documents" "airflow_key" { 44 | content = file("../airflow_access_git_repo/ssh.yaml") 45 | } 46 | 47 | resource "kubectl_manifest" "airflow_manifest" { 48 | count = length(data.kubectl_file_documents.airflow_key.documents) 49 | yaml_body = element(data.kubectl_file_documents.airflow_key.documents, count.index) 50 | override_namespace = "airflow" 51 | depends_on = [ 52 | kubectl_manifest.argocd, 53 | data.kubectl_file_documents.airflow_key 54 | ] 55 | } 56 | 57 | data "kubectl_file_documents" "airflow" { 58 | content = file("../apps/airflow-app.yaml") 59 | } 60 | 61 | resource "kubectl_manifest" "airflow" { 62 | count = length(data.kubectl_file_documents.airflow.documents) 63 | yaml_body = element(data.kubectl_file_documents.airflow.documents, count.index) 64 | override_namespace = "argocd" 65 | depends_on = [ 66 | kubectl_manifest.argocd, 67 | data.kubectl_file_documents.airflow, 68 | module.eks 69 | ] 70 | } 71 | 72 | data "kubectl_file_documents" "keys" { 73 | content = file("../secrets/keys.yml") 74 | } 75 | 76 | resource "kubectl_manifest" "keys" { 77 | count = length(data.kubectl_file_documents.keys.documents) 78 | yaml_body = element(data.kubectl_file_documents.keys.documents, count.index) 79 | override_namespace = "airflow" 80 | depends_on = [ 81 | data.kubectl_file_documents.keys, 82 | data.kubectl_file_documents.airflow, 83 | kubectl_manifest.argocd, 84 | kubectl_manifest.airflow 85 | ] 86 | } -------------------------------------------------------------------------------- /k8s/codes/transformation.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | from pyspark.sql import SparkSession 5 | from variables import PATH_TARGET, PATH_CURATED, QUERY, VIEWS 6 | 7 | class ServeData: 8 | def __init__(self, spark) -> None: 9 | self.spark = spark 10 | self.path_target = PATH_TARGET 11 | self.path_curated = PATH_CURATED 12 | self.query = QUERY 13 | 14 | def run(self) -> str: 15 | self.create_logger() 16 | self.to_curated() 17 | 18 | return "Application completed. Going out..." 19 | 20 | def create_logger(self): 21 | logging.basicConfig(format='%(name)s - %(asctime)s %(message)s', 22 | datefmt='%m/%d/%Y %I:%M:%S %p', stream=sys.stdout) 23 | logger = logging.getLogger('ETL_AWS_VINICIUS_CAMPOS') 24 | logger.setLevel(logging.DEBUG) 25 | 26 | def to_curated(self): 27 | 28 | views_to_drop = [] 29 | 30 | for view in VIEWS: 31 | print(view) 32 | ( 33 | spark.read.format("parquet") 34 | .load(f'{self.path_target}'.format(file=view)) 35 | .createOrReplaceTempView(f'{view}') 36 | ) 37 | views_to_drop.append(view) 38 | 39 | print(views_to_drop) 40 | 41 | df = spark.sql(self.query['QUERY']) 42 | 43 | for view in views_to_drop: 44 | spark.catalog.dropTempView(f"{view}") 45 | 46 | df.cache() 47 | 48 | ( 49 | df.coalesce(1) 50 | .write.format("parquet") 51 | .mode("overwrite") 52 | .save(self.path_curated) 53 | ) 54 | 55 | if __name__ == "__main__": 56 | 57 | spark = ( 58 | SparkSession.builder.appName('ETL_AWS_VINICIUS_CAMPOS') 59 | .enableHiveSupport() 60 | .config('spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version', '2') 61 | .config('spark.speculation', 'false') 62 | .config('spark.sql.adaptive.enabled', 'true') 63 | .config('spark.shuffle.service.enabled', 'true') 64 | .config('spark.dynamicAllocation.enabled', 'true') 65 | .config('spark.sql.adaptive.coalescePartitions.enabled', 'true') 66 | .config('spark.sql.adaptive.coalescePartitions.minPartitionNum', '1') 67 | .config('spark.sql.adaptive.coalescePartitions.initialPartitionNum', '10') 68 | .config('spark.sql.adaptive.advisoryPartitionSizeInBytes', '134217728') 69 | .config('spark.serializer', 'org.apache.spark.serializer.KryoSerializer') 70 | .config('spark.dynamicAllocation.minExecutors', "5") 71 | .config('spark.dynamicAllocation.maxExecutors', "30") 72 | .config('spark.dynamicAllocation.initialExecutors', "10") 73 | .config('spark.sql.debug.maxToStringFields', '300') 74 | .config('spark.sql.join.preferSortMergeJoin', 'true') 75 | .getOrCreate() 76 | ) 77 | 78 | spark.sparkContext.setLogLevel("ERROR") 79 | 80 | m = ServeData(spark) 81 | 82 | m.run() 83 | 84 | spark.stop() 85 | -------------------------------------------------------------------------------- /k8s/codes/variables.py: -------------------------------------------------------------------------------- 1 | PATH_SOURCE = 's3://landing-zone-vini-poc-etl-aws/data/AdventureWorks/{file}.csv' 2 | PATH_TARGET = 's3://processing-zone-vini-poc-etl-aws/processing/AdventureWorks_AdventureWorks_{file}' 3 | PATH_CURATED = 's3://curated-zone-vini-poc-etl-aws/curated/' 4 | 5 | VIEWS = [ 6 | 'Customers', 7 | 'Product_Categories', 8 | 'Product_Subcategories', 9 | 'Products', 10 | 'Returns', 11 | 'Sales_2015', 12 | 'Sales_2016', 13 | 'Sales_2017' 14 | ] 15 | 16 | QUERY = { 17 | 18 | 'QUERY': """ 19 | WITH all_sales ( 20 | SELECT * FROM Sales_2015 21 | UNION ALL 22 | SELECT * FROM Sales_2016 23 | UNION ALL 24 | SELECT * FROM Sales_2017 25 | ), info as ( 26 | SELECT 27 | cast(from_unixtime(unix_timestamp(a.OrderDate, 'M/d/yyyy'), 'yyyy-MM-dd') as date) as OrderDate, 28 | cast(from_unixtime(unix_timestamp(a.StockDate, 'M/d/yyyy'), 'yyyy-MM-dd') as date) as StockDate, 29 | cast(a.CustomerKey as int) as CustomerKey, 30 | cast(a.TerritoryKey as int) as TerritoryKey, 31 | cast(a.OrderLineItem as int) as OrderLineItem, 32 | cast(a.OrderQuantity as int) as OrderQuantity, 33 | b.Prefix, 34 | b.FirstName, 35 | b.LastName, 36 | cast(from_unixtime(unix_timestamp(b.BirthDate, 'M/d/yyyy'), 'yyyy-MM-dd') as date) as BirthDate, 37 | b.MaritalStatus, 38 | b.Gender, 39 | b.EmailAddress, 40 | cast(replace(replace(b.AnnualIncome, "$", ""), ",", "") as decimal(10,2)) as AnnualIncome, 41 | cast(b.TotalChildren as int) as TotalChildren, 42 | b.EducationLevel, 43 | b.Occupation, 44 | b.HomeOwner, 45 | cast(c.ProductKey as int) as ProductKey, 46 | cast(d.ProductSubcategoryKey as int) as ProductSubcategoryKey, 47 | d.SubcategoryName, 48 | cast(d.ProductCategoryKey as int) as ProductCategoryKey, 49 | e.CategoryName, 50 | c.ProductSKU, 51 | c.ProductName, 52 | c.ModelName, 53 | c.ProductDescription, 54 | c.ProductColor, 55 | cast(c.ProductSize as int) as ProductSize, 56 | c.ProductStyle, 57 | cast(c.ProductCost as decimal(10,2)) as ProductCost , 58 | cast(c.ProductPrice as decimal(10,2)) as ProductPrice, 59 | cast(from_unixtime(unix_timestamp(f.ReturnDate, 'M/d/yyyy'), 'yyyy-MM-dd') as date) as ReturnDate, 60 | NVL(cast(f.ReturnQuantity as int),0) as ReturnQuantity 61 | FROM 62 | all_sales a 63 | LEFT JOIN 64 | Customers b 65 | ON 66 | a.CustomerKey = b.CustomerKey 67 | LEFT JOIN 68 | Products c 69 | ON 70 | a.ProductKey = c.ProductKey 71 | LEFT JOIN 72 | Product_Subcategories d 73 | ON 74 | c.ProductSubcategoryKey = d.ProductSubcategoryKey 75 | LEFT JOIN 76 | Product_Categories e 77 | ON 78 | d.ProductCategoryKey = e.ProductCategoryKey 79 | LEFT JOIN 80 | Returns f 81 | ON 82 | a.TerritoryKey = f.TerritoryKey AND 83 | c.ProductKey = f.ProductKey 84 | ) 85 | SELECT 86 | * 87 | FROM 88 | info 89 | """ 90 | } 91 | -------------------------------------------------------------------------------- /k8s/codes/csv-to-parquet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import ast 4 | 5 | import pyspark.sql.functions as f 6 | 7 | from pyspark.sql import SparkSession 8 | from variables import PATH_SOURCE, PATH_TARGET 9 | 10 | class CSVtoPARQUET: 11 | def __init__(self, spark, path_source:str, format_source: str, path_target:str, format_target: str) -> None: 12 | self.spark = spark 13 | 14 | if format_source != 'csv': 15 | raise Exception(f"The format_source {format_source} is not supported. Use CSV.") 16 | elif format_target != 'parquet': 17 | raise Exception(f"The format_target {format_target} is not supported. Use PARQUET.") 18 | else: 19 | self.format_source = format_source 20 | self.format_target = format_target 21 | 22 | self.path_source = path_source 23 | self.path_target = path_target 24 | 25 | def run(self) -> str: 26 | self.create_logger() 27 | self.csv_to_parquet() 28 | 29 | return "Application completed. Going out..." 30 | 31 | def create_logger(self): 32 | logging.basicConfig(format='%(name)s - %(asctime)s %(message)s', 33 | datefmt='%m/%d/%Y %I:%M:%S %p', stream=sys.stdout) 34 | logger = logging.getLogger('ETL_AWS_VINICIUS_CAMPOS') 35 | logger.setLevel(logging.DEBUG) 36 | 37 | def csv_to_parquet(self): 38 | df = ( 39 | self.spark.read.format(self.format_source) 40 | .option("sep", ",") 41 | .option("header", True) 42 | .option("encoding", "utf-8") 43 | .load(self.path_source) 44 | ) 45 | 46 | return df.coalesce(1).write.mode("overwrite").format(self.format_target).save(self.path_target) 47 | 48 | if __name__ == "__main__": 49 | 50 | spark = ( 51 | SparkSession.builder.appName('ETL_AWS_VINICIUS_CAMPOS') 52 | .enableHiveSupport() 53 | .config('spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version', '2') 54 | .config('spark.speculation', 'false') 55 | .config('spark.sql.adaptive.enabled', 'true') 56 | .config('spark.shuffle.service.enabled', 'true') 57 | .config('spark.dynamicAllocation.enabled', 'true') 58 | .config('spark.sql.adaptive.coalescePartitions.enabled', 'true') 59 | .config('spark.sql.adaptive.coalescePartitions.minPartitionNum', '1') 60 | .config('spark.sql.adaptive.coalescePartitions.initialPartitionNum', '10') 61 | .config('spark.sql.adaptive.advisoryPartitionSizeInBytes', '134217728') 62 | .config('spark.serializer', 'org.apache.spark.serializer.KryoSerializer') 63 | .config('spark.dynamicAllocation.minExecutors', "5") 64 | .config('spark.dynamicAllocation.maxExecutors', "30") 65 | .config('spark.dynamicAllocation.initialExecutors', "10") 66 | .config('spark.sql.debug.maxToStringFields', '300') 67 | .config('spark.sql.join.preferSortMergeJoin', 'true') 68 | .getOrCreate() 69 | ) 70 | 71 | spark.sparkContext.setLogLevel("ERROR") 72 | 73 | script_input = ast.literal_eval(sys.argv[1]) 74 | 75 | file = script_input['file'] 76 | format_source = script_input['format_source'] 77 | format_target = script_input['format_target'] 78 | 79 | m = CSVtoPARQUET( 80 | spark, 81 | PATH_SOURCE.format(file=file), 82 | format_source, 83 | PATH_TARGET.format(file=file), 84 | format_target 85 | ) 86 | 87 | m.run() 88 | 89 | spark.stop() -------------------------------------------------------------------------------- /k8s/resources/cluster-redshift.tf: -------------------------------------------------------------------------------- 1 | resource "aws_vpc" "redshift_vpc" { 2 | cidr_block = "10.0.0.0/16" 3 | instance_tenancy = "default" 4 | 5 | tags = { 6 | Name = "redshift-vpc" 7 | } 8 | } 9 | 10 | resource "aws_internet_gateway" "redshift_vpc_gw" { 11 | vpc_id = aws_vpc.redshift_vpc.id 12 | 13 | depends_on = [ 14 | aws_vpc.redshift_vpc 15 | ] 16 | } 17 | 18 | resource "aws_default_security_group" "redshift_security_group" { 19 | vpc_id = aws_vpc.redshift_vpc.id 20 | 21 | ingress { 22 | from_port = 0 23 | to_port = 0 24 | protocol = "-1" 25 | cidr_blocks = ["0.0.0.0/0"] 26 | } 27 | 28 | tags = { 29 | Name = "redshift-sg" 30 | } 31 | 32 | depends_on = [ 33 | aws_vpc.redshift_vpc 34 | ] 35 | } 36 | 37 | resource "aws_subnet" "redshift_subnet_1" { 38 | vpc_id = aws_vpc.redshift_vpc.id 39 | cidr_block = "10.0.1.0/28" 40 | availability_zone = "us-east-1a" 41 | map_public_ip_on_launch = "true" 42 | 43 | tags = { 44 | Name = "redshift-subnet-1" 45 | } 46 | 47 | depends_on = [ 48 | aws_vpc.redshift_vpc 49 | ] 50 | } 51 | 52 | resource "aws_subnet" "redshift_subnet_2" { 53 | vpc_id = aws_vpc.redshift_vpc.id 54 | cidr_block = "10.0.32.0/20" 55 | availability_zone = "us-east-1a" 56 | map_public_ip_on_launch = "true" 57 | 58 | tags = { 59 | Name = "redshift-subnet-2" 60 | } 61 | 62 | depends_on = [ 63 | aws_vpc.redshift_vpc 64 | ] 65 | } 66 | 67 | resource "aws_redshift_subnet_group" "redshift_subnet_group" { 68 | name = "redshift-subnet-group" 69 | 70 | subnet_ids = [ 71 | aws_subnet.redshift_subnet_1.id, 72 | aws_subnet.redshift_subnet_2.id 73 | ] 74 | 75 | tags = { 76 | environment = "vini-etl-aws" 77 | Name = "redshift-subnet-group" 78 | } 79 | } 80 | 81 | 82 | resource "aws_iam_role_policy" "s3_full_access_policy" { 83 | name = "redshift_s3_policy" 84 | 85 | role = aws_iam_role.redshift_role.id 86 | policy = < str: 34 | return string_substitution(self._val, VAR__TEMPLATE_VALUE_CACHE) 35 | 36 | def as_variable(self) -> Variable: 37 | return Variable( 38 | key=self.key, 39 | val=self.val 40 | ) 41 | 42 | 43 | ############### 44 | ## Variables ## 45 | ############### 46 | VAR__TEMPLATE_NAMES = [ 47 | {{- range $k, $v := .Values.airflow.variablesTemplates }} 48 | {{ $k | quote }}, 49 | {{- end }} 50 | ] 51 | VAR__TEMPLATE_MTIME_CACHE = {} 52 | VAR__TEMPLATE_VALUE_CACHE = {} 53 | VAR__VARIABLE_WRAPPERS = { 54 | {{- range .Values.airflow.variables }} 55 | {{ .key | quote }}: VariableWrapper( 56 | key={{ (required "each `key` in `airflow.variables` must be non-empty!" .key) | quote }}, 57 | val={{ (required "each `value` in `airflow.variables` must be non-empty!" .value) | quote }}, 58 | ), 59 | {{- end }} 60 | } 61 | 62 | 63 | ############### 64 | ## Functions ## 65 | ############### 66 | def compare_variables(v1: Variable, v2: Variable) -> bool: 67 | """ 68 | Check if two Variable objects are identical. 69 | """ 70 | return v1.key == v2.key and v1.val == v2.val 71 | 72 | 73 | def sync_variable(variable_wrapper: VariableWrapper) -> None: 74 | """ 75 | Sync the Variable defined by a provided VariableWrapper into the airflow DB. 76 | """ 77 | v_key = variable_wrapper.key 78 | v_new = variable_wrapper.as_variable() 79 | 80 | variable_added = False 81 | variable_updated = False 82 | 83 | with create_session() as session: 84 | v_old = session.query(Variable).filter(Variable.key == v_key).first() 85 | if not v_old: 86 | logging.info(f"Variable=`{v_key}` is missing, adding...") 87 | session.add(v_new) 88 | variable_added = True 89 | else: 90 | if compare_variables(v_new, v_old): 91 | pass 92 | else: 93 | logging.info(f"Variable=`{v_key}` exists but has changed, updating...") 94 | v_old.val = v_new.val 95 | variable_updated = True 96 | 97 | if variable_added: 98 | logging.info(f"Variable=`{v_key}` was successfully added.") 99 | if variable_updated: 100 | logging.info(f"Variable=`{v_key}` was successfully updated.") 101 | 102 | 103 | def sync_all_variables(variable_wrappers: Dict[str, VariableWrapper]) -> None: 104 | """ 105 | Sync all variables in provided `variable_wrappers`. 106 | """ 107 | logging.info("BEGIN: airflow variables sync") 108 | for variable_wrapper in variable_wrappers.values(): 109 | sync_variable(variable_wrapper) 110 | logging.info("END: airflow variables sync") 111 | 112 | 113 | def sync_with_airflow() -> None: 114 | """ 115 | Preform a sync of all objects with airflow (note, `sync_with_airflow()` is called in `main()` template). 116 | """ 117 | sync_all_variables(variable_wrappers=VAR__VARIABLE_WRAPPERS) 118 | 119 | 120 | ############## 121 | ## Run Main ## 122 | ############## 123 | {{- if .Values.airflow.variablesUpdate }} 124 | main(sync_forever=True) 125 | {{- else }} 126 | main(sync_forever=False) 127 | {{- end }} 128 | 129 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/_helpers/common.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Construct the base name for all resources in this chart. 3 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 4 | */}} 5 | {{- define "airflow.fullname" -}} 6 | {{- printf "%s" .Release.Name | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Construct the `labels.app` for used by all resources in this chart. 11 | */}} 12 | {{- define "airflow.labels.app" -}} 13 | {{- printf "%s" .Chart.Name | trunc 63 | trimSuffix "-" -}} 14 | {{- end -}} 15 | 16 | {{/* 17 | Construct the `labels.chart` for used by all resources in this chart. 18 | */}} 19 | {{- define "airflow.labels.chart" -}} 20 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 21 | {{- end -}} 22 | 23 | {{/* 24 | Construct the name of the airflow ServiceAccount. 25 | */}} 26 | {{- define "airflow.serviceAccountName" -}} 27 | {{- if .Values.serviceAccount.create -}} 28 | {{- .Values.serviceAccount.name | default (include "airflow.fullname" .) -}} 29 | {{- else -}} 30 | {{- .Values.serviceAccount.name | default "default" -}} 31 | {{- end -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | A flag indicating if a celery-like executor is selected (empty if false) 36 | */}} 37 | {{- define "airflow.executor.celery_like" -}} 38 | {{- if or (eq .Values.airflow.executor "CeleryExecutor") (eq .Values.airflow.executor "CeleryKubernetesExecutor") -}} 39 | true 40 | {{- end -}} 41 | {{- end -}} 42 | 43 | {{/* 44 | A flag indicating if a kubernetes-like executor is selected (empty if false) 45 | */}} 46 | {{- define "airflow.executor.kubernetes_like" -}} 47 | {{- if or (eq .Values.airflow.executor "KubernetesExecutor") (eq .Values.airflow.executor "CeleryKubernetesExecutor") -}} 48 | true 49 | {{- end -}} 50 | {{- end -}} 51 | 52 | {{/* 53 | The scheme (HTTP, HTTPS) used by the webserver 54 | */}} 55 | {{- define "airflow.web.scheme" -}} 56 | {{- if and (.Values.airflow.config.AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT) (.Values.airflow.config.AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY) -}} 57 | HTTPS 58 | {{- else -}} 59 | HTTP 60 | {{- end -}} 61 | {{- end -}} 62 | 63 | {{/* 64 | The path containing DAG files 65 | */}} 66 | {{- define "airflow.dags.path" -}} 67 | {{- if .Values.dags.gitSync.enabled -}} 68 | {{- printf "%s/repo/%s" (.Values.dags.path | trimSuffix "/") (.Values.dags.gitSync.repoSubPath | trimAll "/") -}} 69 | {{- else -}} 70 | {{- printf .Values.dags.path -}} 71 | {{- end -}} 72 | {{- end -}} 73 | 74 | {{/* 75 | If PgBouncer should be used. 76 | */}} 77 | {{- define "airflow.pgbouncer.should_use" -}} 78 | {{- if .Values.pgbouncer.enabled -}} 79 | {{- if or (.Values.postgresql.enabled) (eq .Values.externalDatabase.type "postgres") -}} 80 | true 81 | {{- end -}} 82 | {{- end -}} 83 | {{- end -}} 84 | 85 | {{/* 86 | Construct the `postgresql.fullname` of the postgresql sub-chat chart. 87 | Used to discover the Service and Secret name created by the sub-chart. 88 | */}} 89 | {{- define "airflow.postgresql.fullname" -}} 90 | {{- if .Values.postgresql.fullnameOverride -}} 91 | {{- .Values.postgresql.fullnameOverride | trunc 63 | trimSuffix "-" -}} 92 | {{- else -}} 93 | {{- $name := default "postgresql" .Values.postgresql.nameOverride -}} 94 | {{- if contains $name .Release.Name -}} 95 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 96 | {{- else -}} 97 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 98 | {{- end -}} 99 | {{- end -}} 100 | {{- end -}} 101 | 102 | {{/* 103 | Construct the `redis.fullname` of the redis sub-chat chart. 104 | Used to discover the master Service and Secret name created by the sub-chart. 105 | */}} 106 | {{- define "airflow.redis.fullname" -}} 107 | {{- if .Values.redis.fullnameOverride -}} 108 | {{- .Values.redis.fullnameOverride | trunc 63 | trimSuffix "-" -}} 109 | {{- else -}} 110 | {{- $name := default "redis" .Values.redis.nameOverride -}} 111 | {{- if contains $name .Release.Name -}} 112 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 113 | {{- else -}} 114 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 115 | {{- end -}} 116 | {{- end -}} 117 | {{- end -}} 118 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/_helpers/sync_pools.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | The python sync script for pools. 3 | */}} 4 | {{- define "airflow.sync.sync_pools.py" }} 5 | ############################ 6 | #### BEGIN: GLOBAL CODE #### 7 | ############################ 8 | {{- include "airflow.sync.global_code" . }} 9 | ########################## 10 | #### END: GLOBAL CODE #### 11 | ########################## 12 | 13 | ############# 14 | ## Imports ## 15 | ############# 16 | from airflow.models import Pool 17 | from airflow.utils.db import create_session 18 | 19 | 20 | ############# 21 | ## Classes ## 22 | ############# 23 | class PoolWrapper(object): 24 | def __init__( 25 | self, 26 | name: str, 27 | description: str, 28 | slots: int, 29 | ): 30 | self.name = name 31 | self.description = description 32 | self.slots = slots 33 | 34 | def as_pool(self) -> Pool: 35 | pool = Pool() 36 | pool.pool = self.name 37 | pool.slots = self.slots 38 | pool.description = self.description 39 | return pool 40 | 41 | 42 | ############### 43 | ## Variables ## 44 | ############### 45 | VAR__TEMPLATE_NAMES = [] 46 | VAR__TEMPLATE_MTIME_CACHE = {} 47 | VAR__TEMPLATE_VALUE_CACHE = {} 48 | VAR__POOL_WRAPPERS = { 49 | {{- range .Values.airflow.pools }} 50 | {{ .name | quote }}: PoolWrapper( 51 | name={{ (required "each `name` in `airflow.pools` must be non-empty!" .name) | quote }}, 52 | description={{ (required "each `description` in `airflow.pools` must be non-empty!" .description) | quote }}, 53 | {{- if not (or (typeIs "float64" .slots) (typeIs "int64" .slots)) }} 54 | {{- /* the type of a number could be float64 or int64 depending on how it was set (values.yaml, or --set) */ -}} 55 | {{ required "each `slots` in `airflow.pools` must be int-type!" nil }} 56 | {{- end }} 57 | slots={{ (required "each `slots` in `airflow.pools` must be non-empty!" .slots) }}, 58 | ), 59 | {{- end }} 60 | } 61 | 62 | 63 | ############### 64 | ## Functions ## 65 | ############### 66 | def compare_pools(p1: Pool, p2: Pool) -> bool: 67 | """ 68 | Check if two Pool objects are identical. 69 | """ 70 | return ( 71 | p1.pool == p1.pool 72 | and p1.description == p2.description 73 | and p1.slots == p2.slots 74 | ) 75 | 76 | 77 | def sync_pool(pool_wrapper: PoolWrapper) -> None: 78 | """ 79 | Sync the Pool defined by a provided PoolWrapper into the airflow DB. 80 | """ 81 | p_name = pool_wrapper.name 82 | p_new = pool_wrapper.as_pool() 83 | 84 | pool_added = False 85 | pool_updated = False 86 | 87 | with create_session() as session: 88 | p_old = session.query(Pool).filter(Pool.pool == p_name).first() 89 | if not p_old: 90 | logging.info(f"Pool=`{p_name}` is missing, adding...") 91 | session.add(p_new) 92 | pool_added = True 93 | else: 94 | if compare_pools(p_new, p_old): 95 | pass 96 | else: 97 | logging.info(f"Pool=`{p_name}` exists but has changed, updating...") 98 | p_old.description = p_new.description 99 | p_old.slots = p_new.slots 100 | pool_updated = True 101 | 102 | if pool_added: 103 | logging.info(f"Pool=`{p_name}` was successfully added.") 104 | if pool_updated: 105 | logging.info(f"Pool=`{p_name}` was successfully updated.") 106 | 107 | 108 | def sync_all_pools(pool_wrappers: Dict[str, PoolWrapper]) -> None: 109 | """ 110 | Sync all pools in provided `pool_wrappers`. 111 | """ 112 | logging.info("BEGIN: airflow pools sync") 113 | for pool_wrapper in pool_wrappers.values(): 114 | sync_pool(pool_wrapper) 115 | logging.info("END: airflow pools sync") 116 | 117 | 118 | def sync_with_airflow() -> None: 119 | """ 120 | Preform a sync of all objects with airflow (note, `sync_with_airflow()` is called in `main()` template). 121 | """ 122 | sync_all_pools(pool_wrappers=VAR__POOL_WRAPPERS) 123 | 124 | 125 | ############## 126 | ## Run Main ## 127 | ############## 128 | {{- if .Values.airflow.poolsUpdate }} 129 | main(sync_forever=True) 130 | {{- else }} 131 | main(sync_forever=False) 132 | {{- end }} 133 | 134 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/db-migrations/_helpers/code.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | The python script to apply airflow database migrations. 3 | */}} 4 | {{- define "airflow.db_migrations.db_migrations.py" }} 5 | ############# 6 | ## Imports ## 7 | ############# 8 | import logging 9 | import time 10 | from airflow.utils.db import upgradedb 11 | 12 | 13 | ############# 14 | ## Configs ## 15 | ############# 16 | log = logging.getLogger(__file__) 17 | log.setLevel("INFO") 18 | 19 | # how frequently to check for unapplied migrations 20 | CONF__CHECK_MIGRATIONS_INTERVAL = {{ .Values.airflow.dbMigrations.checkInterval }} 21 | 22 | 23 | ############### 24 | ## Functions ## 25 | ############### 26 | {{- if .Values.airflow.legacyCommands }} 27 | # imports required for the following functions 28 | import os 29 | import airflow 30 | from airflow import settings 31 | 32 | # modified from https://github.com/apache/airflow/blob/2.1.0/airflow/utils/db.py#L583-L592 33 | def _get_alembic_config(): 34 | from alembic.config import Config 35 | 36 | package_dir = os.path.abspath(os.path.dirname(airflow.__file__)) 37 | directory = os.path.join(package_dir, 'migrations') 38 | config = Config(os.path.join(package_dir, 'alembic.ini')) 39 | config.set_main_option('script_location', directory.replace('%', '%%')) 40 | config.set_main_option('sqlalchemy.url', settings.SQL_ALCHEMY_CONN.replace('%', '%%')) 41 | return config 42 | 43 | 44 | # copied from https://github.com/apache/airflow/blob/2.1.0/airflow/utils/db.py#L595-L622 45 | def check_migrations(timeout): 46 | """ 47 | Function to wait for all airflow migrations to complete. 48 | :param timeout: Timeout for the migration in seconds 49 | :return: None 50 | """ 51 | from alembic.runtime.migration import MigrationContext 52 | from alembic.script import ScriptDirectory 53 | 54 | config = _get_alembic_config() 55 | script_ = ScriptDirectory.from_config(config) 56 | with settings.engine.connect() as connection: 57 | context = MigrationContext.configure(connection) 58 | ticker = 0 59 | while True: 60 | source_heads = set(script_.get_heads()) 61 | db_heads = set(context.get_current_heads()) 62 | if source_heads == db_heads: 63 | break 64 | if ticker >= timeout: 65 | raise TimeoutError( 66 | f"There are still unapplied migrations after {ticker} seconds. " 67 | f"Migration Head(s) in DB: {db_heads} | Migration Head(s) in Source Code: {source_heads}" 68 | ) 69 | ticker += 1 70 | time.sleep(1) 71 | log.info('Waiting for migrations... %s second(s)', ticker) 72 | {{- else }} 73 | from airflow.utils.db import check_migrations 74 | {{- end }} 75 | 76 | 77 | def needs_db_migrations() -> bool: 78 | """ 79 | Return a boolean representing if the database has unapplied migrations. 80 | """ 81 | log_alembic = logging.getLogger("alembic.runtime.migration") 82 | log_alembic_level = log_alembic.level 83 | try: 84 | log_alembic.setLevel("WARN") 85 | check_migrations(0) 86 | log_alembic.setLevel(log_alembic_level) 87 | return False 88 | except TimeoutError: 89 | return True 90 | 91 | 92 | def apply_db_migrations() -> None: 93 | """ 94 | Apply any pending DB migrations. 95 | """ 96 | log.info("-------- START - APPLY DB MIGRATIONS --------") 97 | upgradedb() 98 | log.info("-------- FINISH - APPLY DB MIGRATIONS --------") 99 | 100 | 101 | def main(sync_forever: bool): 102 | # initial check & apply 103 | if needs_db_migrations(): 104 | log.warning("there are unapplied db migrations, triggering apply...") 105 | apply_db_migrations() 106 | else: 107 | log.info("there are no unapplied db migrations, continuing...") 108 | 109 | if sync_forever: 110 | # define variable to track how long since last migrations check 111 | migrations_check_epoch = time.time() 112 | 113 | # main loop 114 | while True: 115 | if (time.time() - migrations_check_epoch) > CONF__CHECK_MIGRATIONS_INTERVAL: 116 | log.debug(f"check interval reached, checking for unapplied db migrations...") 117 | if needs_db_migrations(): 118 | log.warning("there are unapplied db migrations, triggering apply...") 119 | apply_db_migrations() 120 | migrations_check_epoch = time.time() 121 | 122 | # ensure we dont loop too fast 123 | time.sleep(0.5) 124 | 125 | 126 | ############## 127 | ## Run Main ## 128 | ############## 129 | {{- /* if running as a Job, only run the initial check & apply */ -}} 130 | {{- if .Values.airflow.dbMigrations.runAsJob }} 131 | main(sync_forever=False) 132 | {{- else }} 133 | main(sync_forever=True) 134 | {{- end }} 135 | 136 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/_helpers/global_code.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Code which is included in all python sync scripts. 3 | */}} 4 | {{- define "airflow.sync.global_code" }} 5 | #################### 6 | ## Global Imports ## 7 | #################### 8 | import logging 9 | import os 10 | import time 11 | from string import Template 12 | from typing import List, Dict, Optional 13 | 14 | 15 | #################### 16 | ## Global Configs ## 17 | #################### 18 | # the path which Secret/ConfigMap are mounted to 19 | CONF__TEMPLATES_PATH = "/mnt/templates" 20 | 21 | # how frequently to check for Secret/ConfigMap updates 22 | CONF__TEMPLATES_SYNC_INTERVAL = 10 23 | 24 | # how frequently to re-sync objects (Connections, Pools, Users, Variables) 25 | CONF__OBJECTS_SYNC_INTERVAL = 60 26 | 27 | 28 | ###################### 29 | ## Global Functions ## 30 | ###################### 31 | def string_substitution(raw_string: Optional[str], substitution_map: Dict[str, str]) -> str: 32 | """ 33 | Apply bash-like substitutions to a raw string. 34 | 35 | Example: 36 | - string_substitution("Hello!", None) -> "Hello!" 37 | - string_substitution("Hello ${NAME}!", {"NAME": "Airflow"}) -> "Hello Airflow!" 38 | """ 39 | if raw_string and len(substitution_map) > 0: 40 | tpl = Template(raw_string) 41 | return tpl.safe_substitute(substitution_map) 42 | else: 43 | return raw_string 44 | 45 | 46 | def template_mtime(template_name: str) -> float: 47 | """ 48 | Return the modification-time of the file storing `template_name` 49 | """ 50 | file_path = f"{CONF__TEMPLATES_PATH}/{template_name}" 51 | return os.stat(file_path).st_mtime 52 | 53 | 54 | def template_value(template_name: str) -> str: 55 | """ 56 | Return the contents of the file storing `template_name` 57 | """ 58 | file_path = f"{CONF__TEMPLATES_PATH}/{template_name}" 59 | with open(file_path, "r") as f: 60 | return f.read() 61 | 62 | 63 | def refresh_template_cache(template_names: List[str], 64 | template_mtime_cache: Dict[str, float], 65 | template_value_cache: Dict[str, str]) -> List[str]: 66 | """ 67 | Refresh the provided dictionary caches of template values & mtimes. 68 | 69 | :param template_names: the names of all templates to refresh 70 | :param template_mtime_cache: the dictionary cache of template file modification-times 71 | :param template_value_cache: the dictionary cache of template values 72 | :return: the names of templates which changed 73 | """ 74 | changed_templates = [] 75 | for template_name in template_names: 76 | old_mtime = template_mtime_cache.get(template_name, None) 77 | new_mtime = template_mtime(template_name) 78 | # first, check if the files were modified 79 | if old_mtime != new_mtime: 80 | old_value = template_value_cache.get(template_name, None) 81 | new_value = template_value(template_name) 82 | # second, check if the value actually changed 83 | if old_value != new_value: 84 | template_value_cache[template_name] = new_value 85 | changed_templates += [template_name] 86 | template_mtime_cache[template_name] = new_mtime 87 | return changed_templates 88 | 89 | 90 | def main(sync_forever: bool): 91 | # initial sync of template cache 92 | refresh_template_cache( 93 | template_names=VAR__TEMPLATE_NAMES, 94 | template_mtime_cache=VAR__TEMPLATE_MTIME_CACHE, 95 | template_value_cache=VAR__TEMPLATE_VALUE_CACHE 96 | ) 97 | 98 | # initial sync of objects into Airflow DB 99 | sync_with_airflow() 100 | 101 | if sync_forever: 102 | # define variables used to track how long since last refresh/sync 103 | templates_sync_epoch = time.time() 104 | objects_sync_epoch = time.time() 105 | 106 | # main loop 107 | while True: 108 | # monitor for template secret/configmap updates 109 | if (time.time() - templates_sync_epoch) > CONF__TEMPLATES_SYNC_INTERVAL: 110 | logging.debug(f"template sync interval reached, re-syncing all templates...") 111 | changed_templates = refresh_template_cache( 112 | template_names=VAR__TEMPLATE_NAMES, 113 | template_mtime_cache=VAR__TEMPLATE_MTIME_CACHE, 114 | template_value_cache=VAR__TEMPLATE_VALUE_CACHE 115 | ) 116 | templates_sync_epoch = time.time() 117 | if changed_templates: 118 | logging.info(f"template values have changed: [{','.join(changed_templates)}]") 119 | sync_with_airflow() 120 | objects_sync_epoch = time.time() 121 | 122 | # monitor for external changes to objects (like from UI) 123 | if (time.time() - objects_sync_epoch) > CONF__OBJECTS_SYNC_INTERVAL: 124 | logging.debug(f"sync interval reached, re-syncing all objects...") 125 | sync_with_airflow() 126 | objects_sync_epoch = time.time() 127 | 128 | # ensure we dont loop too fast 129 | time.sleep(0.5) 130 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/db-migrations/db-migrations-job.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.dbMigrations.enabled) (.Values.airflow.dbMigrations.runAsJob) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.dbMigrations.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.dbMigrations.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.dbMigrations.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.dbMigrations.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: batch/v1 10 | kind: Job 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-db-migrations 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: db-migrations 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | annotations: 20 | helm.sh/hook: post-install,post-upgrade 21 | helm.sh/hook-weight: "-100" 22 | helm.sh/hook-delete-policy: before-hook-creation 23 | {{- if .Values.airflow.dbMigrations.annotations }} 24 | {{- toYaml .Values.airflow.dbMigrations.annotations | nindent 4 }} 25 | {{- end }} 26 | spec: 27 | template: 28 | metadata: 29 | annotations: 30 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 31 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 32 | checksum/db-migrations-script: {{ include "airflow.db_migrations.db_migrations.py" . | sha256sum }} 33 | {{- if .Values.airflow.podAnnotations }} 34 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 35 | {{- end }} 36 | {{- if .Values.airflow.dbMigrations.podAnnotations }} 37 | {{- toYaml .Values.airflow.dbMigrations.podAnnotations | nindent 8 }} 38 | {{- end }} 39 | {{- if .Values.airflow.dbMigrations.safeToEvict }} 40 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 41 | {{- end }} 42 | labels: 43 | app: {{ include "airflow.labels.app" . }} 44 | component: db-migrations 45 | release: {{ .Release.Name }} 46 | {{- if .Values.airflow.dbMigrations.podLabels }} 47 | {{- toYaml .Values.airflow.dbMigrations.podLabels | nindent 8 }} 48 | {{- end }} 49 | spec: 50 | restartPolicy: OnFailure 51 | {{- if .Values.airflow.image.pullSecret }} 52 | imagePullSecrets: 53 | - name: {{ .Values.airflow.image.pullSecret }} 54 | {{- end }} 55 | {{- if $podNodeSelector }} 56 | nodeSelector: 57 | {{- $podNodeSelector | nindent 8 }} 58 | {{- end }} 59 | {{- if $podAffinity }} 60 | affinity: 61 | {{- $podAffinity | nindent 8 }} 62 | {{- end }} 63 | {{- if $podTolerations }} 64 | tolerations: 65 | {{- $podTolerations | nindent 8 }} 66 | {{- end }} 67 | {{- if $podSecurityContext }} 68 | securityContext: 69 | {{- $podSecurityContext | nindent 8 }} 70 | {{- end }} 71 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 72 | initContainers: 73 | {{- if $extraPipPackages }} 74 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 75 | {{- end }} 76 | {{- if .Values.dags.gitSync.enabled }} 77 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 78 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 79 | {{- end }} 80 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 81 | containers: 82 | - name: db-migrations 83 | {{- include "airflow.image" . | indent 10 }} 84 | resources: 85 | {{- toYaml .Values.airflow.dbMigrations.resources | nindent 12 }} 86 | envFrom: 87 | {{- include "airflow.envFrom" . | indent 12 }} 88 | env: 89 | {{- include "airflow.env" . | indent 12 }} 90 | command: 91 | {{- include "airflow.command" . | indent 12 }} 92 | args: 93 | - "python" 94 | - "-u" 95 | - "/mnt/scripts/db_migrations.py" 96 | volumeMounts: 97 | {{- $volumeMounts | indent 12 }} 98 | - name: scripts 99 | mountPath: /mnt/scripts 100 | readOnly: true 101 | volumes: 102 | {{- $volumes | indent 8 }} 103 | - name: scripts 104 | secret: 105 | secretName: {{ include "airflow.fullname" . }}-db-migrations 106 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-pools-job.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.pools) (not .Values.airflow.poolsUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: batch/v1 10 | kind: Job 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-pools 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-pools 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | annotations: 20 | helm.sh/hook: post-install,post-upgrade 21 | helm.sh/hook-weight: "0" 22 | helm.sh/hook-delete-policy: before-hook-creation 23 | {{- if .Values.airflow.sync.annotations }} 24 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 25 | {{- end }} 26 | spec: 27 | template: 28 | metadata: 29 | annotations: 30 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 31 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 32 | checksum/sync-pools-script: {{ include "airflow.sync.sync_pools.py" . | sha256sum }} 33 | {{- if .Values.airflow.podAnnotations }} 34 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 35 | {{- end }} 36 | {{- if .Values.airflow.sync.podAnnotations }} 37 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 38 | {{- end }} 39 | {{- if .Values.airflow.sync.safeToEvict }} 40 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 41 | {{- end }} 42 | labels: 43 | app: {{ include "airflow.labels.app" . }} 44 | component: sync-pools 45 | release: {{ .Release.Name }} 46 | {{- if .Values.airflow.sync.podLabels }} 47 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 48 | {{- end }} 49 | spec: 50 | restartPolicy: OnFailure 51 | {{- if .Values.airflow.image.pullSecret }} 52 | imagePullSecrets: 53 | - name: {{ .Values.airflow.image.pullSecret }} 54 | {{- end }} 55 | {{- if $podNodeSelector }} 56 | nodeSelector: 57 | {{- $podNodeSelector | nindent 8 }} 58 | {{- end }} 59 | {{- if $podAffinity }} 60 | affinity: 61 | {{- $podAffinity | nindent 8 }} 62 | {{- end }} 63 | {{- if $podTolerations }} 64 | tolerations: 65 | {{- $podTolerations | nindent 8 }} 66 | {{- end }} 67 | {{- if $podSecurityContext }} 68 | securityContext: 69 | {{- $podSecurityContext | nindent 8 }} 70 | {{- end }} 71 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 72 | initContainers: 73 | {{- if $extraPipPackages }} 74 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 75 | {{- end }} 76 | {{- if .Values.dags.gitSync.enabled }} 77 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 78 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 79 | {{- end }} 80 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 81 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 82 | containers: 83 | - name: sync-airflow-pools 84 | {{- include "airflow.image" . | indent 10 }} 85 | resources: 86 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 87 | envFrom: 88 | {{- include "airflow.envFrom" . | indent 12 }} 89 | env: 90 | {{- include "airflow.env" . | indent 12 }} 91 | command: 92 | {{- include "airflow.command" . | indent 12 }} 93 | args: 94 | - "python" 95 | - "-u" 96 | - "/mnt/scripts/sync_pools.py" 97 | volumeMounts: 98 | {{- $volumeMounts | indent 12 }} 99 | - name: scripts 100 | mountPath: /mnt/scripts 101 | readOnly: true 102 | volumes: 103 | {{- $volumes | indent 8 }} 104 | - name: scripts 105 | secret: 106 | secretName: {{ include "airflow.fullname" . }}-sync-pools 107 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-pools-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.pools) (.Values.airflow.poolsUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-pools 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-pools 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | {{- if .Values.airflow.sync.annotations }} 20 | annotations: 21 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 22 | {{- end }} 23 | spec: 24 | replicas: 1 25 | strategy: 26 | ## only 1 replica should run at a time 27 | type: Recreate 28 | selector: 29 | matchLabels: 30 | app: {{ include "airflow.labels.app" . }} 31 | component: sync-pools 32 | release: {{ .Release.Name }} 33 | template: 34 | metadata: 35 | annotations: 36 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 37 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 38 | checksum/sync-pools-script: {{ include "airflow.sync.sync_pools.py" . | sha256sum }} 39 | {{- if .Values.airflow.podAnnotations }} 40 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 41 | {{- end }} 42 | {{- if .Values.airflow.sync.podAnnotations }} 43 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 44 | {{- end }} 45 | {{- if .Values.airflow.sync.safeToEvict }} 46 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 47 | {{- end }} 48 | labels: 49 | app: {{ include "airflow.labels.app" . }} 50 | component: sync-pools 51 | release: {{ .Release.Name }} 52 | {{- if .Values.airflow.sync.podLabels }} 53 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 54 | {{- end }} 55 | spec: 56 | restartPolicy: Always 57 | {{- if .Values.airflow.image.pullSecret }} 58 | imagePullSecrets: 59 | - name: {{ .Values.airflow.image.pullSecret }} 60 | {{- end }} 61 | {{- if $podNodeSelector }} 62 | nodeSelector: 63 | {{- $podNodeSelector | nindent 8 }} 64 | {{- end }} 65 | {{- if $podAffinity }} 66 | affinity: 67 | {{- $podAffinity | nindent 8 }} 68 | {{- end }} 69 | {{- if $podTolerations }} 70 | tolerations: 71 | {{- $podTolerations | nindent 8 }} 72 | {{- end }} 73 | {{- if $podSecurityContext }} 74 | securityContext: 75 | {{- $podSecurityContext | nindent 8 }} 76 | {{- end }} 77 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 78 | initContainers: 79 | {{- if $extraPipPackages }} 80 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 81 | {{- end }} 82 | {{- if .Values.dags.gitSync.enabled }} 83 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 84 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 85 | {{- end }} 86 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 87 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 88 | containers: 89 | - name: sync-airflow-pools 90 | {{- include "airflow.image" . | indent 10 }} 91 | resources: 92 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 93 | envFrom: 94 | {{- include "airflow.envFrom" . | indent 12 }} 95 | env: 96 | {{- include "airflow.env" . | indent 12 }} 97 | command: 98 | {{- include "airflow.command" . | indent 12 }} 99 | args: 100 | - "python" 101 | - "-u" 102 | - "/mnt/scripts/sync_pools.py" 103 | volumeMounts: 104 | {{- $volumeMounts | indent 12 }} 105 | - name: scripts 106 | mountPath: /mnt/scripts 107 | readOnly: true 108 | {{- if .Values.dags.gitSync.enabled }} 109 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 110 | {{- include "airflow.container.git_sync" . | indent 8 }} 111 | {{- end }} 112 | volumes: 113 | {{- $volumes | indent 8 }} 114 | - name: scripts 115 | secret: 116 | secretName: {{ include "airflow.fullname" . }}-sync-pools 117 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/db-migrations/db-migrations-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.dbMigrations.enabled) (not .Values.airflow.dbMigrations.runAsJob) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.dbMigrations.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.dbMigrations.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.dbMigrations.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.dbMigrations.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-db-migrations 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: db-migrations 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | {{- if .Values.airflow.dbMigrations.annotations }} 20 | annotations: 21 | {{- toYaml .Values.airflow.dbMigrations.annotations | nindent 4 }} 22 | {{- end }} 23 | spec: 24 | replicas: 1 25 | strategy: 26 | ## only 1 replica should run at a time 27 | type: Recreate 28 | selector: 29 | matchLabels: 30 | app: {{ include "airflow.labels.app" . }} 31 | component: db-migrations 32 | release: {{ .Release.Name }} 33 | template: 34 | metadata: 35 | annotations: 36 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 37 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 38 | checksum/db-migrations-script: {{ include "airflow.db_migrations.db_migrations.py" . | sha256sum }} 39 | {{- if .Values.airflow.podAnnotations }} 40 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 41 | {{- end }} 42 | {{- if .Values.airflow.dbMigrations.podAnnotations }} 43 | {{- toYaml .Values.airflow.dbMigrations.podAnnotations | nindent 8 }} 44 | {{- end }} 45 | {{- if .Values.airflow.dbMigrations.safeToEvict }} 46 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 47 | {{- end }} 48 | labels: 49 | app: {{ include "airflow.labels.app" . }} 50 | component: db-migrations 51 | release: {{ .Release.Name }} 52 | {{- if .Values.airflow.dbMigrations.podLabels }} 53 | {{- toYaml .Values.airflow.dbMigrations.podLabels | nindent 8 }} 54 | {{- end }} 55 | spec: 56 | restartPolicy: Always 57 | {{- if .Values.airflow.image.pullSecret }} 58 | imagePullSecrets: 59 | - name: {{ .Values.airflow.image.pullSecret }} 60 | {{- end }} 61 | {{- if $podNodeSelector }} 62 | nodeSelector: 63 | {{- $podNodeSelector | nindent 8 }} 64 | {{- end }} 65 | {{- if $podAffinity }} 66 | affinity: 67 | {{- $podAffinity | nindent 8 }} 68 | {{- end }} 69 | {{- if $podTolerations }} 70 | tolerations: 71 | {{- $podTolerations | nindent 8 }} 72 | {{- end }} 73 | {{- if $podSecurityContext }} 74 | securityContext: 75 | {{- $podSecurityContext | nindent 8 }} 76 | {{- end }} 77 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 78 | initContainers: 79 | {{- if $extraPipPackages }} 80 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 81 | {{- end }} 82 | {{- if .Values.dags.gitSync.enabled }} 83 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 84 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 85 | {{- end }} 86 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 87 | containers: 88 | - name: db-migrations 89 | {{- include "airflow.image" . | indent 10 }} 90 | resources: 91 | {{- toYaml .Values.airflow.dbMigrations.resources | nindent 12 }} 92 | envFrom: 93 | {{- include "airflow.envFrom" . | indent 12 }} 94 | env: 95 | {{- include "airflow.env" . | indent 12 }} 96 | command: 97 | {{- include "airflow.command" . | indent 12 }} 98 | args: 99 | - "python" 100 | - "-u" 101 | - "/mnt/scripts/db_migrations.py" 102 | volumeMounts: 103 | {{- $volumeMounts | indent 12 }} 104 | - name: scripts 105 | mountPath: /mnt/scripts 106 | readOnly: true 107 | {{- if .Values.dags.gitSync.enabled }} 108 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 109 | {{- include "airflow.container.git_sync" . | indent 8 }} 110 | {{- end }} 111 | volumes: 112 | {{- $volumes | indent 8 }} 113 | - name: scripts 114 | secret: 115 | secretName: {{ include "airflow.fullname" . }}-db-migrations 116 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-users-job.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.users) (not .Values.airflow.usersUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: batch/v1 10 | kind: Job 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-users 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-users 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | annotations: 20 | helm.sh/hook: post-install,post-upgrade 21 | helm.sh/hook-weight: "0" 22 | helm.sh/hook-delete-policy: before-hook-creation 23 | {{- if .Values.airflow.sync.annotations }} 24 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 25 | {{- end }} 26 | spec: 27 | template: 28 | metadata: 29 | annotations: 30 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 31 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 32 | checksum/sync-users-script: {{ include "airflow.sync.sync_users.py" . | sha256sum }} 33 | {{- if .Values.airflow.podAnnotations }} 34 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 35 | {{- end }} 36 | {{- if .Values.airflow.sync.podAnnotations }} 37 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 38 | {{- end }} 39 | {{- if .Values.airflow.sync.safeToEvict }} 40 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 41 | {{- end }} 42 | labels: 43 | app: {{ include "airflow.labels.app" . }} 44 | component: sync-users 45 | release: {{ .Release.Name }} 46 | {{- if .Values.airflow.sync.podLabels }} 47 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 48 | {{- end }} 49 | spec: 50 | restartPolicy: OnFailure 51 | {{- if .Values.airflow.image.pullSecret }} 52 | imagePullSecrets: 53 | - name: {{ .Values.airflow.image.pullSecret }} 54 | {{- end }} 55 | {{- if $podNodeSelector }} 56 | nodeSelector: 57 | {{- $podNodeSelector | nindent 8 }} 58 | {{- end }} 59 | {{- if $podAffinity }} 60 | affinity: 61 | {{- $podAffinity | nindent 8 }} 62 | {{- end }} 63 | {{- if $podTolerations }} 64 | tolerations: 65 | {{- $podTolerations | nindent 8 }} 66 | {{- end }} 67 | {{- if $podSecurityContext }} 68 | securityContext: 69 | {{- $podSecurityContext | nindent 8 }} 70 | {{- end }} 71 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 72 | initContainers: 73 | {{- if $extraPipPackages }} 74 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 75 | {{- end }} 76 | {{- if .Values.dags.gitSync.enabled }} 77 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 78 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 79 | {{- end }} 80 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 81 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 82 | containers: 83 | - name: sync-airflow-users 84 | {{- include "airflow.image" . | indent 10 }} 85 | resources: 86 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 87 | envFrom: 88 | {{- include "airflow.envFrom" . | indent 12 }} 89 | env: 90 | {{- include "airflow.env" . | indent 12 }} 91 | command: 92 | {{- include "airflow.command" . | indent 12 }} 93 | args: 94 | - "python" 95 | - "-u" 96 | - "/mnt/scripts/sync_users.py" 97 | volumeMounts: 98 | {{- $volumeMounts | indent 12 }} 99 | - name: scripts 100 | mountPath: /mnt/scripts 101 | readOnly: true 102 | {{- if .Values.airflow.usersTemplates }} 103 | - name: templates 104 | mountPath: "/mnt/templates" 105 | readOnly: true 106 | {{- end }} 107 | volumes: 108 | {{- $volumes | indent 8 }} 109 | - name: scripts 110 | secret: 111 | secretName: {{ include "airflow.fullname" . }}-sync-users 112 | {{- if .Values.airflow.usersTemplates }} 113 | - name: templates 114 | projected: 115 | sources: 116 | {{- range $k, $v := .Values.airflow.usersTemplates }} 117 | {{- if not (regexMatch "^[a-zA-Z_][a-zA-Z0-9_]*$" $k) }} 118 | {{ required "each key in `airflow.usersTemplates` must match the regex: ^[a-zA-Z_][a-zA-Z0-9_]*$" nil }} 119 | {{- end }} 120 | {{- if eq ($v.kind | lower) "configmap" }} 121 | - configMap: 122 | name: {{ $v.name | quote }} 123 | items: 124 | - key: {{ $v.key | quote }} 125 | path: {{ $k | quote }} 126 | {{- else if eq ($v.kind | lower) "secret" }} 127 | - secret: 128 | name: {{ $v.name | quote }} 129 | items: 130 | - key: {{ $v.key | quote }} 131 | path: {{ $k | quote }} 132 | {{- else }} 133 | {{ required "each `kind` in `airflow.usersTemplates` must be one of ['configmap', 'secret']!" nil }} 134 | {{- end }} 135 | {{- end }} 136 | {{- end }} 137 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-variables-job.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.variables) (not .Values.airflow.variablesUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: batch/v1 10 | kind: Job 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-variables 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-variables 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | annotations: 20 | helm.sh/hook: post-install,post-upgrade 21 | helm.sh/hook-weight: "0" 22 | helm.sh/hook-delete-policy: before-hook-creation 23 | {{- if .Values.airflow.sync.annotations }} 24 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 25 | {{- end }} 26 | spec: 27 | template: 28 | metadata: 29 | annotations: 30 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 31 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 32 | checksum/sync-variables-script: {{ include "airflow.sync.sync_variables.py" . | sha256sum }} 33 | {{- if .Values.airflow.podAnnotations }} 34 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 35 | {{- end }} 36 | {{- if .Values.airflow.sync.podAnnotations }} 37 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 38 | {{- end }} 39 | {{- if .Values.airflow.sync.safeToEvict }} 40 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 41 | {{- end }} 42 | labels: 43 | app: {{ include "airflow.labels.app" . }} 44 | component: sync-variables 45 | release: {{ .Release.Name }} 46 | {{- if .Values.airflow.sync.podLabels }} 47 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 48 | {{- end }} 49 | spec: 50 | restartPolicy: OnFailure 51 | {{- if .Values.airflow.image.pullSecret }} 52 | imagePullSecrets: 53 | - name: {{ .Values.airflow.image.pullSecret }} 54 | {{- end }} 55 | {{- if $podNodeSelector }} 56 | nodeSelector: 57 | {{- $podNodeSelector | nindent 8 }} 58 | {{- end }} 59 | {{- if $podAffinity }} 60 | affinity: 61 | {{- $podAffinity | nindent 8 }} 62 | {{- end }} 63 | {{- if $podTolerations }} 64 | tolerations: 65 | {{- $podTolerations | nindent 8 }} 66 | {{- end }} 67 | {{- if $podSecurityContext }} 68 | securityContext: 69 | {{- $podSecurityContext | nindent 8 }} 70 | {{- end }} 71 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 72 | initContainers: 73 | {{- if $extraPipPackages }} 74 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 75 | {{- end }} 76 | {{- if .Values.dags.gitSync.enabled }} 77 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 78 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 79 | {{- end }} 80 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 81 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 82 | containers: 83 | - name: sync-airflow-variables 84 | {{- include "airflow.image" . | indent 10 }} 85 | resources: 86 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 87 | envFrom: 88 | {{- include "airflow.envFrom" . | indent 12 }} 89 | env: 90 | {{- include "airflow.env" . | indent 12 }} 91 | command: 92 | {{- include "airflow.command" . | indent 12 }} 93 | args: 94 | - "python" 95 | - "-u" 96 | - "/mnt/scripts/sync_variables.py" 97 | volumeMounts: 98 | {{- $volumeMounts | indent 12 }} 99 | - name: scripts 100 | mountPath: /mnt/scripts 101 | readOnly: true 102 | {{- if .Values.airflow.variablesTemplates }} 103 | - name: templates 104 | mountPath: "/mnt/templates" 105 | readOnly: true 106 | {{- end }} 107 | volumes: 108 | {{- $volumes | indent 8 }} 109 | - name: scripts 110 | secret: 111 | secretName: {{ include "airflow.fullname" . }}-sync-variables 112 | {{- if .Values.airflow.variablesTemplates }} 113 | - name: templates 114 | projected: 115 | sources: 116 | {{- range $k, $v := .Values.airflow.variablesTemplates }} 117 | {{- if not (regexMatch "^[a-zA-Z_][a-zA-Z0-9_]*$" $k) }} 118 | {{ required "each key in `airflow.variablesTemplates` must match the regex: ^[a-zA-Z_][a-zA-Z0-9_]*$" nil }} 119 | {{- end }} 120 | {{- if eq ($v.kind | lower) "configmap" }} 121 | - configMap: 122 | name: {{ $v.name | quote }} 123 | items: 124 | - key: {{ $v.key | quote }} 125 | path: {{ $k | quote }} 126 | {{- else if eq ($v.kind | lower) "secret" }} 127 | - secret: 128 | name: {{ $v.name | quote }} 129 | items: 130 | - key: {{ $v.key | quote }} 131 | path: {{ $k | quote }} 132 | {{- else }} 133 | {{ required "each `kind` in `airflow.variablesTemplates` must be one of ['configmap', 'secret']!" nil }} 134 | {{- end }} 135 | {{- end }} 136 | {{- end }} 137 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-connections-job.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.connections) (not .Values.airflow.connectionsUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.scheduler.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.scheduler.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.scheduler.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: batch/v1 10 | kind: Job 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-connections 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-connections 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | annotations: 20 | helm.sh/hook: post-install,post-upgrade 21 | helm.sh/hook-weight: "0" 22 | helm.sh/hook-delete-policy: before-hook-creation 23 | {{- if .Values.airflow.sync.annotations }} 24 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 25 | {{- end }} 26 | spec: 27 | template: 28 | metadata: 29 | annotations: 30 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 31 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 32 | checksum/sync-connections-script: {{ include "airflow.sync.sync_connections.py" . | sha256sum }} 33 | {{- if .Values.airflow.podAnnotations }} 34 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 35 | {{- end }} 36 | {{- if .Values.airflow.sync.podAnnotations }} 37 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 38 | {{- end }} 39 | {{- if .Values.airflow.sync.safeToEvict }} 40 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 41 | {{- end }} 42 | labels: 43 | app: {{ include "airflow.labels.app" . }} 44 | component: sync-connections 45 | release: {{ .Release.Name }} 46 | {{- if .Values.airflow.sync.podLabels }} 47 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 48 | {{- end }} 49 | spec: 50 | restartPolicy: OnFailure 51 | {{- if .Values.airflow.image.pullSecret }} 52 | imagePullSecrets: 53 | - name: {{ .Values.airflow.image.pullSecret }} 54 | {{- end }} 55 | {{- if $podNodeSelector }} 56 | nodeSelector: 57 | {{- $podNodeSelector | nindent 8 }} 58 | {{- end }} 59 | {{- if $podAffinity }} 60 | affinity: 61 | {{- $podAffinity | nindent 8 }} 62 | {{- end }} 63 | {{- if $podTolerations }} 64 | tolerations: 65 | {{- $podTolerations | nindent 8 }} 66 | {{- end }} 67 | {{- if $podSecurityContext }} 68 | securityContext: 69 | {{- $podSecurityContext | nindent 8 }} 70 | {{- end }} 71 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 72 | initContainers: 73 | {{- if $extraPipPackages }} 74 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 75 | {{- end }} 76 | {{- if .Values.dags.gitSync.enabled }} 77 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 78 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 79 | {{- end }} 80 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 81 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 82 | containers: 83 | - name: sync-airflow-connections 84 | {{- include "airflow.image" . | indent 10 }} 85 | resources: 86 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 87 | envFrom: 88 | {{- include "airflow.envFrom" . | indent 12 }} 89 | env: 90 | {{- include "airflow.env" . | indent 12 }} 91 | command: 92 | {{- include "airflow.command" . | indent 12 }} 93 | args: 94 | - "python" 95 | - "-u" 96 | - "/mnt/scripts/sync_connections.py" 97 | volumeMounts: 98 | {{- $volumeMounts | indent 12 }} 99 | - name: scripts 100 | mountPath: /mnt/scripts 101 | readOnly: true 102 | {{- if .Values.airflow.connectionsTemplates }} 103 | - name: templates 104 | mountPath: "/mnt/templates" 105 | readOnly: true 106 | {{- end }} 107 | volumes: 108 | {{- $volumes | indent 8 }} 109 | - name: scripts 110 | secret: 111 | secretName: {{ include "airflow.fullname" . }}-sync-connections 112 | {{- if .Values.airflow.connectionsTemplates }} 113 | - name: templates 114 | projected: 115 | sources: 116 | {{- range $k, $v := .Values.airflow.connectionsTemplates }} 117 | {{- if not (regexMatch "^[a-zA-Z_][a-zA-Z0-9_]*$" $k) }} 118 | {{ required "each key in `airflow.connectionsTemplates` must match the regex: ^[a-zA-Z_][a-zA-Z0-9_]*$" nil }} 119 | {{- end }} 120 | {{- if eq ($v.kind | lower) "configmap" }} 121 | - configMap: 122 | name: {{ $v.name | quote }} 123 | items: 124 | - key: {{ $v.key | quote }} 125 | path: {{ $k | quote }} 126 | {{- else if eq ($v.kind | lower) "secret" }} 127 | - secret: 128 | name: {{ $v.name | quote }} 129 | items: 130 | - key: {{ $v.key | quote }} 131 | path: {{ $k | quote }} 132 | {{- else }} 133 | {{ required "each `kind` in `airflow.connectionsTemplates` must be one of ['configmap', 'secret']!" nil }} 134 | {{- end }} 135 | {{- end }} 136 | {{- end }} 137 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-users-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.users) (.Values.airflow.usersUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-users 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-users 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | {{- if .Values.airflow.sync.annotations }} 20 | annotations: 21 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 22 | {{- end }} 23 | spec: 24 | replicas: 1 25 | strategy: 26 | ## only 1 replica should run at a time 27 | type: Recreate 28 | selector: 29 | matchLabels: 30 | app: {{ include "airflow.labels.app" . }} 31 | component: sync-users 32 | release: {{ .Release.Name }} 33 | template: 34 | metadata: 35 | annotations: 36 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 37 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 38 | checksum/sync-users-script: {{ include "airflow.sync.sync_users.py" . | sha256sum }} 39 | {{- if .Values.airflow.podAnnotations }} 40 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 41 | {{- end }} 42 | {{- if .Values.airflow.sync.podAnnotations }} 43 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 44 | {{- end }} 45 | {{- if .Values.airflow.sync.safeToEvict }} 46 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 47 | {{- end }} 48 | labels: 49 | app: {{ include "airflow.labels.app" . }} 50 | component: sync-users 51 | release: {{ .Release.Name }} 52 | {{- if .Values.airflow.sync.podLabels }} 53 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 54 | {{- end }} 55 | spec: 56 | restartPolicy: Always 57 | {{- if .Values.airflow.image.pullSecret }} 58 | imagePullSecrets: 59 | - name: {{ .Values.airflow.image.pullSecret }} 60 | {{- end }} 61 | {{- if $podNodeSelector }} 62 | nodeSelector: 63 | {{- $podNodeSelector | nindent 8 }} 64 | {{- end }} 65 | {{- if $podAffinity }} 66 | affinity: 67 | {{- $podAffinity | nindent 8 }} 68 | {{- end }} 69 | {{- if $podTolerations }} 70 | tolerations: 71 | {{- $podTolerations | nindent 8 }} 72 | {{- end }} 73 | {{- if $podSecurityContext }} 74 | securityContext: 75 | {{- $podSecurityContext | nindent 8 }} 76 | {{- end }} 77 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 78 | initContainers: 79 | {{- if $extraPipPackages }} 80 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 81 | {{- end }} 82 | {{- if .Values.dags.gitSync.enabled }} 83 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 84 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 85 | {{- end }} 86 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 87 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 88 | containers: 89 | - name: sync-airflow-users 90 | {{- include "airflow.image" . | indent 10 }} 91 | resources: 92 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 93 | envFrom: 94 | {{- include "airflow.envFrom" . | indent 12 }} 95 | env: 96 | {{- include "airflow.env" . | indent 12 }} 97 | command: 98 | {{- include "airflow.command" . | indent 12 }} 99 | args: 100 | - "python" 101 | - "-u" 102 | - "/mnt/scripts/sync_users.py" 103 | volumeMounts: 104 | {{- $volumeMounts | indent 12 }} 105 | - name: scripts 106 | mountPath: /mnt/scripts 107 | readOnly: true 108 | {{- if .Values.airflow.usersTemplates }} 109 | - name: templates 110 | mountPath: "/mnt/templates" 111 | readOnly: true 112 | {{- end }} 113 | {{- if .Values.dags.gitSync.enabled }} 114 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 115 | {{- include "airflow.container.git_sync" . | indent 8 }} 116 | {{- end }} 117 | volumes: 118 | {{- $volumes | indent 8 }} 119 | - name: scripts 120 | secret: 121 | secretName: {{ include "airflow.fullname" . }}-sync-users 122 | {{- if .Values.airflow.usersTemplates }} 123 | - name: templates 124 | projected: 125 | sources: 126 | {{- range $k, $v := .Values.airflow.usersTemplates }} 127 | {{- if not (regexMatch "^[a-zA-Z_][a-zA-Z0-9_]*$" $k) }} 128 | {{ required "each key in `airflow.usersTemplates` must match the regex: ^[a-zA-Z_][a-zA-Z0-9_]*$" nil }} 129 | {{- end }} 130 | {{- if eq ($v.kind | lower) "configmap" }} 131 | - configMap: 132 | name: {{ $v.name | quote }} 133 | items: 134 | - key: {{ $v.key | quote }} 135 | path: {{ $k | quote }} 136 | {{- else if eq ($v.kind | lower) "secret" }} 137 | - secret: 138 | name: {{ $v.name | quote }} 139 | items: 140 | - key: {{ $v.key | quote }} 141 | path: {{ $k | quote }} 142 | {{- else }} 143 | {{ required "each `kind` in `airflow.usersTemplates` must be one of ['configmap', 'secret']!" nil }} 144 | {{- end }} 145 | {{- end }} 146 | {{- end }} 147 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-variables-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.variables) (.Values.airflow.variablesUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-variables 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-variables 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | {{- if .Values.airflow.sync.annotations }} 20 | annotations: 21 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 22 | {{- end }} 23 | spec: 24 | replicas: 1 25 | strategy: 26 | ## only 1 replica should run at a time 27 | type: Recreate 28 | selector: 29 | matchLabels: 30 | app: {{ include "airflow.labels.app" . }} 31 | component: sync-variables 32 | release: {{ .Release.Name }} 33 | template: 34 | metadata: 35 | annotations: 36 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 37 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 38 | checksum/sync-variables-script: {{ include "airflow.sync.sync_variables.py" . | sha256sum }} 39 | {{- if .Values.airflow.podAnnotations }} 40 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 41 | {{- end }} 42 | {{- if .Values.airflow.sync.podAnnotations }} 43 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 44 | {{- end }} 45 | {{- if .Values.airflow.sync.safeToEvict }} 46 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 47 | {{- end }} 48 | labels: 49 | app: {{ include "airflow.labels.app" . }} 50 | component: sync-variables 51 | release: {{ .Release.Name }} 52 | {{- if .Values.airflow.sync.podLabels }} 53 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 54 | {{- end }} 55 | spec: 56 | restartPolicy: Always 57 | {{- if .Values.airflow.image.pullSecret }} 58 | imagePullSecrets: 59 | - name: {{ .Values.airflow.image.pullSecret }} 60 | {{- end }} 61 | {{- if $podNodeSelector }} 62 | nodeSelector: 63 | {{- $podNodeSelector | nindent 8 }} 64 | {{- end }} 65 | {{- if $podAffinity }} 66 | affinity: 67 | {{- $podAffinity | nindent 8 }} 68 | {{- end }} 69 | {{- if $podTolerations }} 70 | tolerations: 71 | {{- $podTolerations | nindent 8 }} 72 | {{- end }} 73 | {{- if $podSecurityContext }} 74 | securityContext: 75 | {{- $podSecurityContext | nindent 8 }} 76 | {{- end }} 77 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 78 | initContainers: 79 | {{- if $extraPipPackages }} 80 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 81 | {{- end }} 82 | {{- if .Values.dags.gitSync.enabled }} 83 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 84 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 85 | {{- end }} 86 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 87 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 88 | containers: 89 | - name: sync-airflow-variables 90 | {{- include "airflow.image" . | indent 10 }} 91 | resources: 92 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 93 | envFrom: 94 | {{- include "airflow.envFrom" . | indent 12 }} 95 | env: 96 | {{- include "airflow.env" . | indent 12 }} 97 | command: 98 | {{- include "airflow.command" . | indent 12 }} 99 | args: 100 | - "python" 101 | - "-u" 102 | - "/mnt/scripts/sync_variables.py" 103 | volumeMounts: 104 | {{- $volumeMounts | indent 12 }} 105 | - name: scripts 106 | mountPath: /mnt/scripts 107 | readOnly: true 108 | {{- if .Values.airflow.variablesTemplates }} 109 | - name: templates 110 | mountPath: "/mnt/templates" 111 | readOnly: true 112 | {{- end }} 113 | {{- if .Values.dags.gitSync.enabled }} 114 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 115 | {{- include "airflow.container.git_sync" . | indent 8 }} 116 | {{- end }} 117 | volumes: 118 | {{- $volumes | indent 8 }} 119 | - name: scripts 120 | secret: 121 | secretName: {{ include "airflow.fullname" . }}-sync-variables 122 | {{- if .Values.airflow.variablesTemplates }} 123 | - name: templates 124 | projected: 125 | sources: 126 | {{- range $k, $v := .Values.airflow.variablesTemplates }} 127 | {{- if not (regexMatch "^[a-zA-Z_][a-zA-Z0-9_]*$" $k) }} 128 | {{ required "each key in `airflow.variablesTemplates` must match the regex: ^[a-zA-Z_][a-zA-Z0-9_]*$" nil }} 129 | {{- end }} 130 | {{- if eq ($v.kind | lower) "configmap" }} 131 | - configMap: 132 | name: {{ $v.name | quote }} 133 | items: 134 | - key: {{ $v.key | quote }} 135 | path: {{ $k | quote }} 136 | {{- else if eq ($v.kind | lower) "secret" }} 137 | - secret: 138 | name: {{ $v.name | quote }} 139 | items: 140 | - key: {{ $v.key | quote }} 141 | path: {{ $k | quote }} 142 | {{- else }} 143 | {{ required "each `kind` in `airflow.variablesTemplates` must be one of ['configmap', 'secret']!" nil }} 144 | {{- end }} 145 | {{- end }} 146 | {{- end }} 147 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/sync-connections-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.airflow.connections) (.Values.airflow.connectionsUpdate) }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.airflow.sync.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.airflow.sync.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.airflow.sync.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.airflow.sync.securityContext) }} 6 | {{- $extraPipPackages := .Values.airflow.extraPipPackages }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 8 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) }} 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | metadata: 12 | name: {{ include "airflow.fullname" . }}-sync-connections 13 | labels: 14 | app: {{ include "airflow.labels.app" . }} 15 | component: sync-connections 16 | chart: {{ include "airflow.labels.chart" . }} 17 | release: {{ .Release.Name }} 18 | heritage: {{ .Release.Service }} 19 | {{- if .Values.airflow.sync.annotations }} 20 | annotations: 21 | {{- toYaml .Values.airflow.sync.annotations | nindent 4 }} 22 | {{- end }} 23 | spec: 24 | replicas: 1 25 | strategy: 26 | ## only 1 replica should run at a time 27 | type: Recreate 28 | selector: 29 | matchLabels: 30 | app: {{ include "airflow.labels.app" . }} 31 | component: sync-connections 32 | release: {{ .Release.Name }} 33 | template: 34 | metadata: 35 | annotations: 36 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 37 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 38 | checksum/sync-connections-script: {{ include "airflow.sync.sync_connections.py" . | sha256sum }} 39 | {{- if .Values.airflow.podAnnotations }} 40 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 41 | {{- end }} 42 | {{- if .Values.airflow.sync.podAnnotations }} 43 | {{- toYaml .Values.airflow.sync.podAnnotations | nindent 8 }} 44 | {{- end }} 45 | {{- if .Values.airflow.sync.safeToEvict }} 46 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 47 | {{- end }} 48 | labels: 49 | app: {{ include "airflow.labels.app" . }} 50 | component: sync-connections 51 | release: {{ .Release.Name }} 52 | {{- if .Values.airflow.sync.podLabels }} 53 | {{- toYaml .Values.airflow.sync.podLabels | nindent 8 }} 54 | {{- end }} 55 | spec: 56 | restartPolicy: Always 57 | {{- if .Values.airflow.image.pullSecret }} 58 | imagePullSecrets: 59 | - name: {{ .Values.airflow.image.pullSecret }} 60 | {{- end }} 61 | {{- if $podNodeSelector }} 62 | nodeSelector: 63 | {{- $podNodeSelector | nindent 8 }} 64 | {{- end }} 65 | {{- if $podAffinity }} 66 | affinity: 67 | {{- $podAffinity | nindent 8 }} 68 | {{- end }} 69 | {{- if $podTolerations }} 70 | tolerations: 71 | {{- $podTolerations | nindent 8 }} 72 | {{- end }} 73 | {{- if $podSecurityContext }} 74 | securityContext: 75 | {{- $podSecurityContext | nindent 8 }} 76 | {{- end }} 77 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 78 | initContainers: 79 | {{- if $extraPipPackages }} 80 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 81 | {{- end }} 82 | {{- if .Values.dags.gitSync.enabled }} 83 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 84 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 85 | {{- end }} 86 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 87 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 88 | containers: 89 | - name: sync-airflow-connections 90 | {{- include "airflow.image" . | indent 10 }} 91 | resources: 92 | {{- toYaml .Values.airflow.sync.resources | nindent 12 }} 93 | envFrom: 94 | {{- include "airflow.envFrom" . | indent 12 }} 95 | env: 96 | {{- include "airflow.env" . | indent 12 }} 97 | command: 98 | {{- include "airflow.command" . | indent 12 }} 99 | args: 100 | - "python" 101 | - "-u" 102 | - "/mnt/scripts/sync_connections.py" 103 | volumeMounts: 104 | {{- $volumeMounts | indent 12 }} 105 | - name: scripts 106 | mountPath: /mnt/scripts 107 | readOnly: true 108 | {{- if .Values.airflow.connectionsTemplates }} 109 | - name: templates 110 | mountPath: "/mnt/templates" 111 | readOnly: true 112 | {{- end }} 113 | {{- if .Values.dags.gitSync.enabled }} 114 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 115 | {{- include "airflow.container.git_sync" . | indent 8 }} 116 | {{- end }} 117 | volumes: 118 | {{- $volumes | indent 8 }} 119 | - name: scripts 120 | secret: 121 | secretName: {{ include "airflow.fullname" . }}-sync-connections 122 | {{- if .Values.airflow.connectionsTemplates }} 123 | - name: templates 124 | projected: 125 | sources: 126 | {{- range $k, $v := .Values.airflow.connectionsTemplates }} 127 | {{- if not (regexMatch "^[a-zA-Z_][a-zA-Z0-9_]*$" $k) }} 128 | {{ required "each key in `airflow.connectionsTemplates` must match the regex: ^[a-zA-Z_][a-zA-Z0-9_]*$" nil }} 129 | {{- end }} 130 | {{- if eq ($v.kind | lower) "configmap" }} 131 | - configMap: 132 | name: {{ $v.name | quote }} 133 | items: 134 | - key: {{ $v.key | quote }} 135 | path: {{ $k | quote }} 136 | {{- else if eq ($v.kind | lower) "secret" }} 137 | - secret: 138 | name: {{ $v.name | quote }} 139 | items: 140 | - key: {{ $v.key | quote }} 141 | path: {{ $k | quote }} 142 | {{- else }} 143 | {{ required "each `kind` in `airflow.connectionsTemplates` must be one of ['configmap', 'secret']!" nil }} 144 | {{- end }} 145 | {{- end }} 146 | {{- end }} 147 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/_helpers/sync_users.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | The python sync script for users. 3 | */}} 4 | {{- define "airflow.sync.sync_users.py" }} 5 | ############################ 6 | #### BEGIN: GLOBAL CODE #### 7 | ############################ 8 | {{- include "airflow.sync.global_code" . }} 9 | ########################## 10 | #### END: GLOBAL CODE #### 11 | ########################## 12 | 13 | 14 | ############# 15 | ## Imports ## 16 | ############# 17 | import sys 18 | from flask_appbuilder.security.sqla.models import User, Role 19 | from werkzeug.security import check_password_hash, generate_password_hash 20 | {{- if .Values.airflow.legacyCommands }} 21 | import airflow.www_rbac.app as www_app 22 | flask_app, flask_appbuilder = www_app.create_app() 23 | {{- else }} 24 | import airflow.www.app as www_app 25 | flask_app = www_app.create_app() 26 | flask_appbuilder = flask_app.appbuilder 27 | {{- end }} 28 | 29 | 30 | ############# 31 | ## Classes ## 32 | ############# 33 | class UserWrapper(object): 34 | def __init__( 35 | self, 36 | username: str, 37 | first_name: Optional[str] = None, 38 | last_name: Optional[str] = None, 39 | email: Optional[str] = None, 40 | role: Optional[str] = None, 41 | password: Optional[str] = None 42 | ): 43 | self.username = username 44 | self._first_name = first_name 45 | self._last_name = last_name 46 | self._email = email 47 | self.role = role 48 | self._password = password 49 | 50 | @property 51 | def first_name(self) -> str: 52 | return string_substitution(self._first_name, VAR__TEMPLATE_VALUE_CACHE) 53 | 54 | @property 55 | def last_name(self) -> str: 56 | return string_substitution(self._last_name, VAR__TEMPLATE_VALUE_CACHE) 57 | 58 | @property 59 | def email(self) -> str: 60 | return string_substitution(self._email, VAR__TEMPLATE_VALUE_CACHE) 61 | 62 | @property 63 | def password(self) -> str: 64 | return string_substitution(self._password, VAR__TEMPLATE_VALUE_CACHE) 65 | 66 | def as_dict(self) -> Dict[str, str]: 67 | return { 68 | "username": self.username, 69 | "first_name": self.first_name, 70 | "last_name": self.last_name, 71 | "email": self.email, 72 | "role": find_role(role_name=self.role), 73 | "password": self.password 74 | } 75 | 76 | 77 | ############### 78 | ## Variables ## 79 | ############### 80 | VAR__TEMPLATE_NAMES = [ 81 | {{- range $k, $v := .Values.airflow.usersTemplates }} 82 | {{ $k | quote }}, 83 | {{- end }} 84 | ] 85 | VAR__TEMPLATE_MTIME_CACHE = {} 86 | VAR__TEMPLATE_VALUE_CACHE = {} 87 | VAR__USER_WRAPPERS = { 88 | {{- range .Values.airflow.users }} 89 | {{ .username | quote }}: UserWrapper( 90 | username={{ (required "each `username` in `airflow.users` must be non-empty!" .username) | quote }}, 91 | first_name={{ (required "each `firstName` in `airflow.users` must be non-empty!" .firstName) | quote }}, 92 | last_name={{ (required "each `lastName` in `airflow.users` must be non-empty!" .lastName) | quote }}, 93 | email={{ (required "each `email` in `airflow.users` must be non-empty!" .email) | quote }}, 94 | role={{ (required "each `role` in `airflow.users` must be non-empty!" .role) | quote }}, 95 | password={{ (required "each `password` in `airflow.users` must be non-empty!" .password) | quote }}, 96 | ), 97 | {{- end }} 98 | } 99 | 100 | 101 | ############### 102 | ## Functions ## 103 | ############### 104 | def find_role(role_name: str) -> Role: 105 | """ 106 | Get the FAB Role model associated with a `role_name`. 107 | """ 108 | found_role = flask_appbuilder.sm.find_role(role_name) 109 | if found_role: 110 | return found_role 111 | else: 112 | valid_roles = flask_appbuilder.sm.get_all_roles() 113 | logging.error(f"Failed to find role=`{role_name}`, valid roles are: {valid_roles}") 114 | sys.exit(1) 115 | 116 | 117 | def compare_users(user_dict: Dict, user_model: User) -> bool: 118 | """ 119 | Check if user info (stored in dict) is identical to a FAB User model. 120 | """ 121 | return ( 122 | user_dict["username"] == user_model.username 123 | and user_dict["first_name"] == user_model.first_name 124 | and user_dict["last_name"] == user_model.last_name 125 | and user_dict["email"] == user_model.email 126 | and [user_dict["role"]] == user_model.roles 127 | and check_password_hash(pwhash=user_model.password, password=user_dict["password"]) 128 | ) 129 | 130 | 131 | def sync_user(user_wrapper: UserWrapper) -> None: 132 | """ 133 | Sync the User defined by a provided UserWrapper into the FAB DB. 134 | """ 135 | username = user_wrapper.username 136 | u_new = user_wrapper.as_dict() 137 | u_old = flask_appbuilder.sm.find_user(username=username) 138 | 139 | if not u_old: 140 | logging.info(f"User=`{username}` is missing, adding...") 141 | if flask_appbuilder.sm.add_user( 142 | username=u_new["username"], 143 | first_name=u_new["first_name"], 144 | last_name=u_new["last_name"], 145 | email=u_new["email"], 146 | role=u_new["role"], 147 | password=u_new["password"] 148 | ): 149 | logging.info(f"User=`{username}` was successfully added.") 150 | else: 151 | logging.error(f"Failed to add User=`{username}`") 152 | sys.exit(1) 153 | else: 154 | if compare_users(u_new, u_old): 155 | pass 156 | else: 157 | logging.info(f"User=`{username}` exists but has changed, updating...") 158 | u_old.first_name = u_new["first_name"] 159 | u_old.last_name = u_new["last_name"] 160 | u_old.email = u_new["email"] 161 | u_old.roles = [u_new["role"]] 162 | u_old.password = generate_password_hash(u_new["password"]) 163 | # strange check for False is because update_user() returns None for success 164 | # but in future might return the User model 165 | if not (flask_appbuilder.sm.update_user(u_old) is False): 166 | logging.info(f"User=`{username}` was successfully updated.") 167 | else: 168 | logging.error(f"Failed to update User=`{username}`") 169 | sys.exit(1) 170 | 171 | 172 | def sync_all_users(user_wrappers: Dict[str, UserWrapper]) -> None: 173 | """ 174 | Sync all users in provided `user_wrappers`. 175 | """ 176 | logging.info("BEGIN: airflow users sync") 177 | for user_wrapper in user_wrappers.values(): 178 | sync_user(user_wrapper) 179 | logging.info("END: airflow users sync") 180 | 181 | # ensures than any SQLAlchemy sessions are closed (so we don't hold a connection to the database) 182 | flask_app.do_teardown_appcontext() 183 | 184 | 185 | def sync_with_airflow() -> None: 186 | """ 187 | Preform a sync of all objects with airflow (note, `sync_with_airflow()` is called in `main()` template). 188 | """ 189 | sync_all_users(user_wrappers=VAR__USER_WRAPPERS) 190 | 191 | 192 | ############## 193 | ## Run Main ## 194 | ############## 195 | {{- if .Values.airflow.usersUpdate }} 196 | main(sync_forever=True) 197 | {{- else }} 198 | main(sync_forever=False) 199 | {{- end }} 200 | 201 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/sync/_helpers/sync_connections.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | The python sync script for connections. 3 | */}} 4 | {{- define "airflow.sync.sync_connections.py" }} 5 | ############################ 6 | #### BEGIN: GLOBAL CODE #### 7 | ############################ 8 | {{- include "airflow.sync.global_code" . }} 9 | ########################## 10 | #### END: GLOBAL CODE #### 11 | ########################## 12 | 13 | 14 | ############# 15 | ## Imports ## 16 | ############# 17 | from airflow.models import Connection 18 | from airflow.utils.db import create_session 19 | 20 | 21 | ############# 22 | ## Classes ## 23 | ############# 24 | class ConnectionWrapper(object): 25 | def __init__( 26 | self, 27 | conn_id: str, 28 | conn_type: str, 29 | {{- if not .Values.airflow.legacyCommands }} 30 | description: Optional[str] = None, 31 | {{- end }} 32 | host: Optional[str] = None, 33 | login: Optional[str] = None, 34 | password: Optional[str] = None, 35 | schema: Optional[str] = None, 36 | port: Optional[int] = None, 37 | extra: Optional[str] = None, 38 | ): 39 | self.conn_id = conn_id 40 | self.conn_type = conn_type 41 | {{- if not .Values.airflow.legacyCommands }} 42 | self.description = description 43 | {{- end }} 44 | self._host = host 45 | self._login = login 46 | self._password = password 47 | self._schema = schema 48 | self.port = port 49 | self._extra = extra 50 | 51 | @property 52 | def host(self) -> str: 53 | return string_substitution(self._host, VAR__TEMPLATE_VALUE_CACHE) 54 | 55 | @property 56 | def login(self) -> str: 57 | return string_substitution(self._login, VAR__TEMPLATE_VALUE_CACHE) 58 | 59 | @property 60 | def password(self) -> str: 61 | return string_substitution(self._password, VAR__TEMPLATE_VALUE_CACHE) 62 | 63 | @property 64 | def schema(self) -> str: 65 | return string_substitution(self._schema, VAR__TEMPLATE_VALUE_CACHE) 66 | 67 | @property 68 | def extra(self) -> str: 69 | return string_substitution(self._extra, VAR__TEMPLATE_VALUE_CACHE) 70 | 71 | def as_connection(self) -> Connection: 72 | return Connection( 73 | conn_id=self.conn_id, 74 | conn_type=self.conn_type, 75 | {{- if not .Values.airflow.legacyCommands }} 76 | description=self.description, 77 | {{- end }} 78 | host=self.host, 79 | login=self.login, 80 | password=self.password, 81 | schema=self.schema, 82 | port=self.port, 83 | extra=self.extra, 84 | ) 85 | 86 | 87 | ############### 88 | ## Variables ## 89 | ############### 90 | VAR__TEMPLATE_NAMES = [ 91 | {{- range $k, $v := .Values.airflow.connectionsTemplates }} 92 | {{ $k | quote }}, 93 | {{- end }} 94 | ] 95 | VAR__TEMPLATE_MTIME_CACHE = {} 96 | VAR__TEMPLATE_VALUE_CACHE = {} 97 | VAR__CONNECTION_WRAPPERS = { 98 | {{- range .Values.airflow.connections }} 99 | {{ .id | quote }}: ConnectionWrapper( 100 | conn_id={{ (required "each `id` in `airflow.connections` must be non-empty!" .id) | quote }}, 101 | conn_type={{ (required "each `type` in `airflow.connections` must be non-empty!" .type) | quote }}, 102 | {{- if and (.description) (not $.Values.airflow.legacyCommands) }} 103 | description={{ .description | quote }}, 104 | {{- end }} 105 | {{- if .host }} 106 | host={{ .host | quote }}, 107 | {{- end }} 108 | {{- if .login }} 109 | login={{ .login | quote }}, 110 | {{- end }} 111 | {{- if .password }} 112 | password={{ .password | quote }}, 113 | {{- end }} 114 | {{- if .schema }} 115 | schema={{ .schema | quote }}, 116 | {{- end }} 117 | {{- if .port }} 118 | {{- if not (or (typeIs "float64" .port) (typeIs "int64" .port)) }} 119 | {{- /* the type of a number could be float64 or int64 depending on how it was set */ -}} 120 | {{ required "each `port` in `airflow.connections` must be int-type!" nil }} 121 | {{- end }} 122 | port={{ .port }}, 123 | {{- end }} 124 | {{- if .extra }} 125 | extra={{ .extra | quote }}, 126 | {{- end }} 127 | ), 128 | {{- end }} 129 | } 130 | 131 | 132 | ############### 133 | ## Functions ## 134 | ############### 135 | def compare_connections(c1: Connection, c2: Connection) -> bool: 136 | """ 137 | Check if two Connection objects are identical. 138 | """ 139 | return ( 140 | c1.conn_id == c2.conn_id 141 | and c1.conn_type == c2.conn_type 142 | {{- if not .Values.airflow.legacyCommands }} 143 | and c1.description == c2.description 144 | {{- end }} 145 | and c1.host == c2.host 146 | and c1.login == c2.login 147 | and c1.password == c2.password 148 | and c1.schema == c2.schema 149 | and c1.port == c2.port 150 | and c1.extra == c2.extra 151 | ) 152 | 153 | 154 | def sync_connection(connection_wrapper: ConnectionWrapper) -> None: 155 | """ 156 | Sync the Connection defined by a provided ConnectionWrapper into the airflow DB. 157 | """ 158 | c_id = connection_wrapper.conn_id 159 | c_new = connection_wrapper.as_connection() 160 | 161 | connection_added = False 162 | connection_updated = False 163 | 164 | with create_session() as session: 165 | c_old = session.query(Connection).filter(Connection.conn_id == c_id).first() 166 | if not c_old: 167 | logging.info(f"Connection=`{c_id}` is missing, adding...") 168 | session.add(c_new) 169 | connection_added = True 170 | else: 171 | if compare_connections(c_new, c_old): 172 | pass 173 | else: 174 | logging.info(f"Connection=`{c_id}` exists but has changed, updating...") 175 | c_old.conn_type = c_new.conn_type 176 | {{- if not .Values.airflow.legacyCommands }} 177 | c_old.description = c_new.description 178 | {{- end }} 179 | c_old.host = c_new.host 180 | c_old.login = c_new.login 181 | c_old.password = c_new.password 182 | c_old.schema = c_new.schema 183 | c_old.port = c_new.port 184 | c_old.extra = c_new.extra 185 | connection_updated = True 186 | 187 | if connection_added: 188 | logging.info(f"Connection=`{c_id}` was successfully added.") 189 | if connection_updated: 190 | logging.info(f"Connection=`{c_id}` was successfully updated.") 191 | 192 | 193 | def sync_all_connections(connection_wrappers: Dict[str, ConnectionWrapper]) -> None: 194 | """ 195 | Sync all connections in provided `connection_wrappers`. 196 | """ 197 | logging.info("BEGIN: airflow connections sync") 198 | for connection_wrapper in connection_wrappers.values(): 199 | sync_connection(connection_wrapper) 200 | logging.info("END: airflow connections sync") 201 | 202 | 203 | def sync_with_airflow() -> None: 204 | """ 205 | Preform a sync of all objects with airflow (note, `sync_with_airflow()` is called in `main()` template). 206 | """ 207 | sync_all_connections(connection_wrappers=VAR__CONNECTION_WRAPPERS) 208 | 209 | 210 | ############## 211 | ## Run Main ## 212 | ############## 213 | {{- if .Values.airflow.connectionsUpdate }} 214 | main(sync_forever=True) 215 | {{- else }} 216 | main(sync_forever=False) 217 | {{- end }} 218 | 219 | {{- end }} -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/webserver/webserver-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.web.nodeSelector) }} 2 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.web.affinity) }} 3 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.web.tolerations) }} 4 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.web.securityContext) }} 5 | {{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.web.extraPipPackages }} 6 | {{- $extraVolumeMounts := .Values.web.extraVolumeMounts }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} 8 | {{- $extraVolumes := .Values.web.extraVolumes }} 9 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} 10 | apiVersion: apps/v1 11 | kind: Deployment 12 | metadata: 13 | name: {{ include "airflow.fullname" . }}-web 14 | {{- if .Values.web.annotations }} 15 | annotations: 16 | {{- toYaml .Values.web.annotations | nindent 4 }} 17 | {{- end }} 18 | labels: 19 | app: {{ include "airflow.labels.app" . }} 20 | component: web 21 | chart: {{ include "airflow.labels.chart" . }} 22 | release: {{ .Release.Name }} 23 | heritage: {{ .Release.Service }} 24 | {{- if .Values.web.labels }} 25 | {{- toYaml .Values.web.labels | nindent 4 }} 26 | {{- end }} 27 | spec: 28 | replicas: {{ .Values.web.replicas }} 29 | strategy: 30 | type: RollingUpdate 31 | rollingUpdate: 32 | ## multiple web pods can safely run concurrently 33 | maxSurge: 25% 34 | maxUnavailable: 0 35 | selector: 36 | matchLabels: 37 | app: {{ include "airflow.labels.app" . }} 38 | component: web 39 | release: {{ .Release.Name }} 40 | template: 41 | metadata: 42 | annotations: 43 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 44 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 45 | checksum/config-webserver-config: {{ include (print $.Template.BasePath "/config/secret-webserver-config.yaml") . | sha256sum }} 46 | {{- if .Values.airflow.podAnnotations }} 47 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 48 | {{- end }} 49 | {{- if .Values.web.podAnnotations }} 50 | {{- toYaml .Values.web.podAnnotations | nindent 8 }} 51 | {{- end }} 52 | {{- if .Values.web.safeToEvict }} 53 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 54 | {{- end }} 55 | labels: 56 | app: {{ include "airflow.labels.app" . }} 57 | component: web 58 | release: {{ .Release.Name }} 59 | {{- if .Values.web.podLabels }} 60 | {{- toYaml .Values.web.podLabels | nindent 8 }} 61 | {{- end }} 62 | spec: 63 | restartPolicy: Always 64 | {{- if .Values.airflow.image.pullSecret }} 65 | imagePullSecrets: 66 | - name: {{ .Values.airflow.image.pullSecret }} 67 | {{- end }} 68 | {{- if $podNodeSelector }} 69 | nodeSelector: 70 | {{- $podNodeSelector | nindent 8 }} 71 | {{- end }} 72 | {{- if $podAffinity }} 73 | affinity: 74 | {{- $podAffinity | nindent 8 }} 75 | {{- end }} 76 | {{- if $podTolerations }} 77 | tolerations: 78 | {{- $podTolerations | nindent 8 }} 79 | {{- end }} 80 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 81 | {{- if $podSecurityContext }} 82 | securityContext: 83 | {{- $podSecurityContext | nindent 8 }} 84 | {{- end }} 85 | initContainers: 86 | {{- if $extraPipPackages }} 87 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 88 | {{- end }} 89 | {{- if .Values.dags.gitSync.enabled }} 90 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 91 | {{- end }} 92 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 93 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 94 | containers: 95 | - name: airflow-web 96 | {{- include "airflow.image" . | indent 10 }} 97 | resources: 98 | {{- toYaml .Values.web.resources | nindent 12 }} 99 | ports: 100 | - name: web 101 | containerPort: 8080 102 | protocol: TCP 103 | envFrom: 104 | {{- include "airflow.envFrom" . | indent 12 }} 105 | env: 106 | {{- include "airflow.env" . | indent 12 }} 107 | command: 108 | {{- include "airflow.command" . | indent 12 }} 109 | args: 110 | - "bash" 111 | - "-c" 112 | - "exec airflow webserver" 113 | {{- if .Values.web.livenessProbe.enabled }} 114 | livenessProbe: 115 | initialDelaySeconds: {{ .Values.web.livenessProbe.initialDelaySeconds }} 116 | periodSeconds: {{ .Values.web.livenessProbe.periodSeconds }} 117 | timeoutSeconds: {{ .Values.web.livenessProbe.timeoutSeconds }} 118 | failureThreshold: {{ .Values.web.livenessProbe.failureThreshold }} 119 | httpGet: 120 | scheme: {{ include "airflow.web.scheme" . }} 121 | {{- $airflowUrl := .Values.airflow.config.AIRFLOW__WEBSERVER__BASE_URL | default "" | printf "%s/health" | urlParse }} 122 | path: {{ get $airflowUrl "path" }} 123 | port: web 124 | {{- end }} 125 | {{- if .Values.web.readinessProbe.enabled }} 126 | readinessProbe: 127 | initialDelaySeconds: {{ .Values.web.readinessProbe.initialDelaySeconds }} 128 | periodSeconds: {{ .Values.web.readinessProbe.periodSeconds }} 129 | timeoutSeconds: {{ .Values.web.readinessProbe.timeoutSeconds }} 130 | failureThreshold: {{ .Values.web.readinessProbe.failureThreshold }} 131 | httpGet: 132 | scheme: {{ include "airflow.web.scheme" . }} 133 | {{- $airflowUrl := .Values.airflow.config.AIRFLOW__WEBSERVER__BASE_URL | default "" | printf "%s/health" | urlParse }} 134 | path: {{ get $airflowUrl "path" }} 135 | port: web 136 | {{- end }} 137 | volumeMounts: 138 | {{- $volumeMounts | indent 12 }} 139 | - name: webserver-config 140 | mountPath: /opt/airflow/webserver_config.py 141 | subPath: webserver_config.py 142 | readOnly: true 143 | {{- if .Values.dags.gitSync.enabled }} 144 | {{- include "airflow.container.git_sync" . | indent 8 }} 145 | {{- end }} 146 | {{- if .Values.airflow.extraContainers }} 147 | {{- toYaml .Values.airflow.extraContainers | nindent 8 }} 148 | {{- end }} 149 | volumes: 150 | {{- $volumes | indent 8 }} 151 | - name: webserver-config 152 | secret: 153 | {{- if .Values.web.webserverConfig.existingSecret }} 154 | secretName: {{ .Values.web.webserverConfig.existingSecret }} 155 | {{- else }} 156 | secretName: {{ include "airflow.fullname" . }}-webserver-config 157 | {{- end }} 158 | defaultMode: 0644 -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/flower/flower-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.flower.enabled }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.flower.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.flower.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.flower.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.flower.securityContext) }} 6 | {{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.flower.extraPipPackages }} 7 | {{- $extraVolumeMounts := .Values.flower.extraVolumeMounts }} 8 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} 9 | {{- $extraVolumes := .Values.flower.extraVolumes }} 10 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} 11 | apiVersion: apps/v1 12 | kind: Deployment 13 | metadata: 14 | name: {{ include "airflow.fullname" . }}-flower 15 | {{- if .Values.flower.annotations }} 16 | annotations: 17 | {{- toYaml .Values.flower.annotations | nindent 4 }} 18 | {{- end }} 19 | labels: 20 | app: {{ include "airflow.labels.app" . }} 21 | component: flower 22 | chart: {{ include "airflow.labels.chart" . }} 23 | release: {{ .Release.Name }} 24 | heritage: {{ .Release.Service }} 25 | {{- if .Values.flower.labels }} 26 | {{- toYaml .Values.flower.labels | nindent 4 }} 27 | {{- end }} 28 | spec: 29 | replicas: {{ .Values.flower.replicas }} 30 | strategy: 31 | type: RollingUpdate 32 | rollingUpdate: 33 | ## multiple flower pods can safely run concurrently 34 | maxSurge: 25% 35 | maxUnavailable: 0 36 | selector: 37 | matchLabels: 38 | app: {{ include "airflow.labels.app" . }} 39 | component: flower 40 | release: {{ .Release.Name }} 41 | template: 42 | metadata: 43 | annotations: 44 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 45 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 46 | {{- if .Values.airflow.podAnnotations }} 47 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 48 | {{- end }} 49 | {{- if .Values.flower.podAnnotations }} 50 | {{- toYaml .Values.flower.podAnnotations | nindent 8 }} 51 | {{- end }} 52 | {{- if .Values.flower.safeToEvict }} 53 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 54 | {{- end }} 55 | labels: 56 | app: {{ include "airflow.labels.app" . }} 57 | component: flower 58 | release: {{ .Release.Name }} 59 | {{- if .Values.flower.podLabels }} 60 | {{- toYaml .Values.flower.podLabels | nindent 8 }} 61 | {{- end }} 62 | spec: 63 | restartPolicy: Always 64 | {{- if .Values.airflow.image.pullSecret }} 65 | imagePullSecrets: 66 | - name: {{ .Values.airflow.image.pullSecret }} 67 | {{- end }} 68 | {{- if $podNodeSelector }} 69 | nodeSelector: 70 | {{- $podNodeSelector | nindent 8 }} 71 | {{- end }} 72 | {{- if $podAffinity }} 73 | affinity: 74 | {{- $podAffinity | nindent 8 }} 75 | {{- end }} 76 | {{- if $podTolerations }} 77 | tolerations: 78 | {{- $podTolerations | nindent 8 }} 79 | {{- end }} 80 | {{- if $podSecurityContext }} 81 | securityContext: 82 | {{- $podSecurityContext | nindent 8 }} 83 | {{- end }} 84 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 85 | initContainers: 86 | {{- if $extraPipPackages }} 87 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 88 | {{- end }} 89 | {{- if .Values.dags.gitSync.enabled }} 90 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 91 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 92 | {{- end }} 93 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 94 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 95 | containers: 96 | - name: airflow-flower 97 | {{- include "airflow.image" . | indent 10 }} 98 | resources: 99 | {{- toYaml .Values.flower.resources | nindent 12 }} 100 | envFrom: 101 | {{- include "airflow.envFrom" . | indent 12 }} 102 | env: 103 | {{- include "airflow.env" . | indent 12 }} 104 | ports: 105 | - name: flower 106 | containerPort: 5555 107 | protocol: TCP 108 | command: 109 | {{- include "airflow.command" . | indent 12 }} 110 | args: 111 | - "bash" 112 | - "-c" 113 | {{- if .Values.airflow.legacyCommands }} 114 | - "exec airflow flower" 115 | {{- else }} 116 | - "exec airflow celery flower" 117 | {{- end }} 118 | {{- if .Values.flower.readinessProbe.enabled }} 119 | readinessProbe: 120 | initialDelaySeconds: {{ .Values.flower.readinessProbe.initialDelaySeconds }} 121 | periodSeconds: {{ .Values.flower.readinessProbe.periodSeconds }} 122 | timeoutSeconds: {{ .Values.flower.readinessProbe.timeoutSeconds }} 123 | failureThreshold: {{ .Values.flower.readinessProbe.failureThreshold }} 124 | exec: 125 | command: 126 | - "bash" 127 | - "-c" 128 | {{- if or (.Values.flower.basicAuthSecret) (.Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH) }} 129 | - "exec curl --user $AIRFLOW__CELERY__FLOWER_BASIC_AUTH 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" 130 | {{- else }} 131 | - "exec curl 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" 132 | {{- end }} 133 | {{- end }} 134 | {{- if .Values.flower.livenessProbe.enabled }} 135 | livenessProbe: 136 | initialDelaySeconds: {{ .Values.flower.livenessProbe.initialDelaySeconds }} 137 | periodSeconds: {{ .Values.flower.livenessProbe.periodSeconds }} 138 | timeoutSeconds: {{ .Values.flower.livenessProbe.timeoutSeconds }} 139 | failureThreshold: {{ .Values.flower.livenessProbe.failureThreshold }} 140 | exec: 141 | command: 142 | - "bash" 143 | - "-c" 144 | {{- if or (.Values.flower.basicAuthSecret) (.Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH) }} 145 | - "exec curl --user $AIRFLOW__CELERY__FLOWER_BASIC_AUTH 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" 146 | {{- else }} 147 | - "exec curl 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" 148 | {{- end }} 149 | {{- end }} 150 | {{- if $volumeMounts }} 151 | volumeMounts: 152 | {{- $volumeMounts | indent 12 }} 153 | {{- end }} 154 | {{- if .Values.dags.gitSync.enabled }} 155 | ## git-sync is included so "airflow plugins" & "python packages" can be stored in the dags repo 156 | {{- include "airflow.container.git_sync" . | indent 8 }} 157 | {{- end }} 158 | {{- if .Values.airflow.extraContainers }} 159 | {{- toYaml .Values.airflow.extraContainers | nindent 8 }} 160 | {{- end }} 161 | {{- if $volumes }} 162 | volumes: 163 | {{- $volumes | indent 8 }} 164 | {{- end }} 165 | {{- end }} 166 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/worker/worker-statefulset.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.workers.enabled }} 2 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.workers.nodeSelector) }} 3 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.workers.affinity) }} 4 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.workers.tolerations) }} 5 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.workers.securityContext) }} 6 | {{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.workers.extraPipPackages }} 7 | {{- $extraVolumeMounts := .Values.workers.extraVolumeMounts }} 8 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} 9 | {{- $extraVolumes := .Values.workers.extraVolumes }} 10 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} 11 | apiVersion: apps/v1 12 | ## StatefulSet gives workers consistent DNS names, allowing webserver access to log files 13 | kind: StatefulSet 14 | metadata: 15 | name: {{ include "airflow.fullname" . }}-worker 16 | {{- if .Values.workers.annotations }} 17 | annotations: 18 | {{- toYaml .Values.workers.annotations | nindent 4 }} 19 | {{- end }} 20 | labels: 21 | app: {{ include "airflow.labels.app" . }} 22 | component: worker 23 | chart: {{ include "airflow.labels.chart" . }} 24 | release: {{ .Release.Name }} 25 | heritage: {{ .Release.Service }} 26 | {{- if .Values.workers.labels }} 27 | {{- toYaml .Values.workers.labels | nindent 4 }} 28 | {{- end }} 29 | spec: 30 | serviceName: "{{ include "airflow.fullname" . }}-worker" 31 | replicas: {{ .Values.workers.replicas }} 32 | updateStrategy: 33 | type: RollingUpdate 34 | ## we do not need to guarantee the order in which workers are scaled 35 | podManagementPolicy: Parallel 36 | selector: 37 | matchLabels: 38 | app: {{ include "airflow.labels.app" . }} 39 | component: worker 40 | release: {{ .Release.Name }} 41 | template: 42 | metadata: 43 | annotations: 44 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 45 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 46 | {{- if .Values.airflow.podAnnotations }} 47 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 48 | {{- end }} 49 | {{- if .Values.workers.podAnnotations }} 50 | {{- toYaml .Values.workers.podAnnotations | nindent 8 }} 51 | {{- end }} 52 | {{- if .Values.workers.safeToEvict }} 53 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 54 | {{- end }} 55 | labels: 56 | app: {{ include "airflow.labels.app" . }} 57 | component: worker 58 | release: {{ .Release.Name }} 59 | {{- if .Values.workers.podLabels }} 60 | {{- toYaml .Values.workers.podLabels | nindent 8 }} 61 | {{- end }} 62 | spec: 63 | restartPolicy: Always 64 | {{- if .Values.airflow.image.pullSecret }} 65 | imagePullSecrets: 66 | - name: {{ .Values.airflow.image.pullSecret }} 67 | {{- end }} 68 | {{- if .Values.workers.celery.gracefullTermination }} 69 | terminationGracePeriodSeconds: {{ add .Values.workers.terminationPeriod .Values.workers.celery.gracefullTerminationPeriod }} 70 | {{- else }} 71 | terminationGracePeriodSeconds: {{ .Values.workers.terminationPeriod }} 72 | {{- end }} 73 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 74 | {{- if $podNodeSelector }} 75 | nodeSelector: 76 | {{- $podNodeSelector | nindent 8 }} 77 | {{- end }} 78 | {{- if $podAffinity }} 79 | affinity: 80 | {{- $podAffinity | nindent 8 }} 81 | {{- end }} 82 | {{- if $podTolerations }} 83 | tolerations: 84 | {{- $podTolerations | nindent 8 }} 85 | {{- end }} 86 | {{- if $podSecurityContext }} 87 | securityContext: 88 | {{- $podSecurityContext | nindent 8 }} 89 | {{- end }} 90 | initContainers: 91 | {{- if $extraPipPackages }} 92 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 93 | {{- end }} 94 | {{- if .Values.dags.gitSync.enabled }} 95 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 96 | {{- end }} 97 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 98 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 99 | containers: 100 | - name: airflow-worker 101 | {{- include "airflow.image" . | indent 10 }} 102 | resources: 103 | {{- toYaml .Values.workers.resources | nindent 12 }} 104 | envFrom: 105 | {{- include "airflow.envFrom" . | indent 12 }} 106 | env: 107 | {{- include "airflow.env" . | indent 12 }} 108 | {{- if .Values.workers.celery.gracefullTermination }} 109 | lifecycle: 110 | preStop: 111 | exec: 112 | command: 113 | - "timeout" 114 | - "{{ .Values.workers.celery.gracefullTerminationPeriod }}s" 115 | - "python" 116 | - "-Wignore" 117 | - "-c" 118 | - | 119 | import os 120 | import time 121 | import subprocess 122 | from celery import Celery 123 | from celery.app.control import Inspect 124 | from typing import List 125 | 126 | def run_command(cmd: List[str]) -> str: 127 | process = subprocess.Popen(cmd, stdout=subprocess.PIPE) 128 | output, error = process.communicate() 129 | if error is not None: 130 | raise Exception(error) 131 | else: 132 | return output.decode(encoding="utf-8") 133 | 134 | broker_url = run_command(["bash", "-c", "eval $AIRFLOW__CELERY__BROKER_URL_CMD"]) 135 | local_celery_host = f"celery@{os.environ['HOSTNAME']}" 136 | app = Celery(broker=broker_url) 137 | 138 | # prevent the worker accepting new tasks 139 | print(f"canceling celery consumer for {local_celery_host}...") 140 | app.control.cancel_consumer("default", destination=[local_celery_host]) 141 | 142 | # wait until the worker finishes its current tasks 143 | i = Inspect(app=app, destination=[local_celery_host]) 144 | active_tasks = i.active()[local_celery_host] 145 | while len(active_tasks) > 0: 146 | print(f"waiting [10 sec] for remaining tasks to finish: {[task.get('name') for task in active_tasks]}") 147 | time.sleep(10) 148 | active_tasks = i.active()[local_celery_host] 149 | {{- end }} 150 | ports: 151 | - name: wlog 152 | containerPort: 8793 153 | protocol: TCP 154 | command: 155 | {{- include "airflow.command" . | indent 12 }} 156 | ## required because `/entrypoint` only passes "bash" / "python", and our `lifecycle.preStop` uses "timeout" 157 | - "bash" 158 | - "-c" 159 | args: 160 | {{- if .Values.airflow.legacyCommands }} 161 | - "exec airflow worker" 162 | {{- else }} 163 | - "exec airflow celery worker" 164 | {{- end }} 165 | {{- if $volumeMounts }} 166 | volumeMounts: 167 | {{- $volumeMounts | indent 12 }} 168 | {{- end }} 169 | {{- if .Values.dags.gitSync.enabled }} 170 | {{- include "airflow.container.git_sync" . | indent 8 }} 171 | {{- end }} 172 | {{- if .Values.airflow.extraContainers }} 173 | {{- toYaml .Values.airflow.extraContainers | nindent 8 }} 174 | {{- end }} 175 | {{- if $volumes }} 176 | volumes: 177 | {{- $volumes | indent 8 }} 178 | {{- end }} 179 | {{- end }} 180 | -------------------------------------------------------------------------------- /k8s/charts/airflow/templates/scheduler/scheduler-deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- $podNodeSelector := include "airflow.podNodeSelector" (dict "Release" .Release "Values" .Values "nodeSelector" .Values.scheduler.nodeSelector) }} 2 | {{- $podAffinity := include "airflow.podAffinity" (dict "Release" .Release "Values" .Values "affinity" .Values.scheduler.affinity) }} 3 | {{- $podTolerations := include "airflow.podTolerations" (dict "Release" .Release "Values" .Values "tolerations" .Values.scheduler.tolerations) }} 4 | {{- $podSecurityContext := include "airflow.podSecurityContext" (dict "Release" .Release "Values" .Values "securityContext" .Values.scheduler.securityContext) }} 5 | {{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.scheduler.extraPipPackages }} 6 | {{- $extraVolumeMounts := .Values.scheduler.extraVolumeMounts }} 7 | {{- $volumeMounts := include "airflow.volumeMounts" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} 8 | {{- $extraVolumes := .Values.scheduler.extraVolumes }} 9 | {{- $volumes := include "airflow.volumes" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} 10 | apiVersion: apps/v1 11 | kind: Deployment 12 | metadata: 13 | name: {{ include "airflow.fullname" . }}-scheduler 14 | {{- if .Values.scheduler.annotations }} 15 | annotations: 16 | {{- toYaml .Values.scheduler.annotations | nindent 4 }} 17 | {{- end }} 18 | labels: 19 | app: {{ include "airflow.labels.app" . }} 20 | component: scheduler 21 | chart: {{ include "airflow.labels.chart" . }} 22 | release: {{ .Release.Name }} 23 | heritage: {{ .Release.Service }} 24 | {{- if .Values.scheduler.labels }} 25 | {{- toYaml .Values.scheduler.labels | nindent 4 }} 26 | {{- end }} 27 | spec: 28 | replicas: {{ .Values.scheduler.replicas }} 29 | strategy: 30 | type: RollingUpdate 31 | rollingUpdate: 32 | {{- if .Values.airflow.legacyCommands }} 33 | ## only one scheduler can run concurrently (Airflow 1.10) 34 | maxSurge: 0 35 | maxUnavailable: 100% 36 | {{- else }} 37 | ## multiple schedulers can run concurrently (Airflow 2.0) 38 | maxSurge: 25% 39 | maxUnavailable: 0 40 | {{- end }} 41 | selector: 42 | matchLabels: 43 | app: {{ include "airflow.labels.app" . }} 44 | component: scheduler 45 | release: {{ .Release.Name }} 46 | template: 47 | metadata: 48 | annotations: 49 | checksum/secret-config-envs: {{ include (print $.Template.BasePath "/config/secret-config-envs.yaml") . | sha256sum }} 50 | checksum/secret-local-settings: {{ include (print $.Template.BasePath "/config/secret-local-settings.yaml") . | sha256sum }} 51 | {{- if include "airflow.executor.kubernetes_like" . }} 52 | checksum/config-pod-template: {{ include (print $.Template.BasePath "/config/configmap-pod-template.yaml") . | sha256sum }} 53 | {{- end }} 54 | {{- if .Values.airflow.podAnnotations }} 55 | {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} 56 | {{- end }} 57 | {{- if .Values.scheduler.podAnnotations }} 58 | {{- toYaml .Values.scheduler.podAnnotations | nindent 8 }} 59 | {{- end }} 60 | {{- if .Values.scheduler.safeToEvict }} 61 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 62 | {{- end }} 63 | labels: 64 | app: {{ include "airflow.labels.app" . }} 65 | component: scheduler 66 | release: {{ .Release.Name }} 67 | {{- if .Values.scheduler.podLabels }} 68 | {{- toYaml .Values.scheduler.podLabels | nindent 8 }} 69 | {{- end }} 70 | spec: 71 | restartPolicy: Always 72 | {{- if .Values.airflow.image.pullSecret }} 73 | imagePullSecrets: 74 | - name: {{ .Values.airflow.image.pullSecret }} 75 | {{- end }} 76 | {{- if $podNodeSelector }} 77 | nodeSelector: 78 | {{- $podNodeSelector | nindent 8 }} 79 | {{- end }} 80 | {{- if $podAffinity }} 81 | affinity: 82 | {{- $podAffinity | nindent 8 }} 83 | {{- end }} 84 | {{- if $podTolerations }} 85 | tolerations: 86 | {{- $podTolerations | nindent 8 }} 87 | {{- end }} 88 | {{- if $podSecurityContext }} 89 | securityContext: 90 | {{- $podSecurityContext | nindent 8 }} 91 | {{- end }} 92 | serviceAccountName: {{ include "airflow.serviceAccountName" . }} 93 | initContainers: 94 | {{- if $extraPipPackages }} 95 | {{- include "airflow.init_container.install_pip_packages" (dict "Release" .Release "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} 96 | {{- end }} 97 | {{- if .Values.dags.gitSync.enabled }} 98 | {{- include "airflow.container.git_sync" (dict "Release" .Release "Values" .Values "sync_one_time" "true") | indent 8 }} 99 | {{- end }} 100 | {{- include "airflow.init_container.check_db" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 101 | {{- include "airflow.init_container.wait_for_db_migrations" (dict "Release" .Release "Values" .Values "volumeMounts" $volumeMounts) | indent 8 }} 102 | {{- if .Values.scheduler.extraInitContainers }} 103 | {{- toYaml .Values.scheduler.extraInitContainers | nindent 8 }} 104 | {{- end }} 105 | containers: 106 | - name: airflow-scheduler 107 | {{- include "airflow.image" . | indent 10 }} 108 | resources: 109 | {{- toYaml .Values.scheduler.resources | nindent 12 }} 110 | envFrom: 111 | {{- include "airflow.envFrom" . | indent 12 }} 112 | env: 113 | {{- include "airflow.env" . | indent 12 }} 114 | command: 115 | {{- include "airflow.command" . | indent 12 }} 116 | args: 117 | - "bash" 118 | - "-c" 119 | - "exec airflow scheduler -n {{ .Values.scheduler.numRuns }}" 120 | {{- if .Values.scheduler.livenessProbe.enabled }} 121 | livenessProbe: 122 | initialDelaySeconds: {{ .Values.scheduler.livenessProbe.initialDelaySeconds }} 123 | periodSeconds: {{ .Values.scheduler.livenessProbe.periodSeconds }} 124 | failureThreshold: {{ .Values.scheduler.livenessProbe.failureThreshold }} 125 | timeoutSeconds: {{ .Values.scheduler.livenessProbe.timeoutSeconds }} 126 | exec: 127 | command: 128 | - "python" 129 | - "-Wignore" 130 | - "-c" 131 | - | 132 | import sys 133 | from typing import List 134 | from airflow.jobs.scheduler_job import SchedulerJob 135 | from airflow.utils.db import create_session 136 | from airflow.utils.net import get_hostname 137 | from airflow.utils.state import State 138 | 139 | with create_session() as session: 140 | hostname = get_hostname() 141 | query = session \ 142 | .query(SchedulerJob) \ 143 | .filter_by(state=State.RUNNING, hostname=hostname) \ 144 | .order_by(SchedulerJob.latest_heartbeat.desc()) 145 | jobs: List[SchedulerJob] = query.all() 146 | alive_jobs = [job for job in jobs if job.is_alive()] 147 | count_alive_jobs = len(alive_jobs) 148 | 149 | if count_alive_jobs == 1: 150 | # scheduler is healthy - we expect one SchedulerJob per scheduler 151 | pass 152 | elif count_alive_jobs == 0: 153 | sys.exit(f"UNHEALTHY - 0 alive SchedulerJob for: {hostname}") 154 | else: 155 | sys.exit(f"UNHEALTHY - {count_alive_jobs} (more than 1) alive SchedulerJob for: {hostname}") 156 | {{- end }} 157 | {{- if or ($volumeMounts) (include "airflow.executor.kubernetes_like" .) }} 158 | volumeMounts: 159 | {{- $volumeMounts | indent 12 }} 160 | {{- if include "airflow.executor.kubernetes_like" . }} 161 | - name: pod-template 162 | mountPath: /opt/airflow/pod_templates/pod_template.yaml 163 | subPath: pod_template.yaml 164 | readOnly: true 165 | {{- end }} 166 | {{- end }} 167 | {{- if .Values.dags.gitSync.enabled }} 168 | {{- include "airflow.container.git_sync" . | indent 8 }} 169 | {{- end }} 170 | {{- if .Values.airflow.extraContainers }} 171 | {{- toYaml .Values.airflow.extraContainers | nindent 8 }} 172 | {{- end }} 173 | {{- if or ($volumes) (include "airflow.executor.kubernetes_like" .) }} 174 | volumes: 175 | {{- $volumes | indent 8 }} 176 | {{- if include "airflow.executor.kubernetes_like" . }} 177 | - name: pod-template 178 | configMap: 179 | name: {{ include "airflow.fullname" . }}-pod-template 180 | {{- end }} 181 | {{- end }} --------------------------------------------------------------------------------