├── .gitignore ├── images ├── git.png ├── airflow.png ├── git_sync.jpg └── architecture.jpg ├── .github └── workflows │ └── release.yaml ├── examples └── dags │ ├── sample_bash_operator_dag.py │ └── sample_docker_operator_dag.py ├── LICENSE ├── CONTRIBUTING.md ├── CHANGELOG.md ├── README.md └── docker-compose.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | dags/ 2 | plugins/ 3 | logs/ 4 | config/ 5 | project_data/ 6 | 7 | -------------------------------------------------------------------------------- /images/git.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/git.png -------------------------------------------------------------------------------- /images/airflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/airflow.png -------------------------------------------------------------------------------- /images/git_sync.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/git_sync.jpg -------------------------------------------------------------------------------- /images/architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/architecture.jpg -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | 6 | jobs: 7 | release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 14 | token: ${{ secrets.GITHUB_TOKEN }} 15 | 16 | - name: DataBurst Python Semantic Versioning 17 | uses: mostafaghadimi/python-semantic-versioning@main 18 | with: 19 | gh_token: ${{ secrets.GITHUB_TOKEN }} 20 | debug: true 21 | -------------------------------------------------------------------------------- /examples/dags/sample_bash_operator_dag.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | from airflow import DAG 4 | from airflow.providers.standard.operators.bash import BashOperator 5 | 6 | default_args = { 7 | "owner": "Mostafa Ghadimi", 8 | "description": "Use of the BashOperator", 9 | "depend_on_past": False, 10 | "start_date": datetime(2023, 4, 4), 11 | "email_on_failure": False, 12 | "email_on_retry": False, 13 | "retries": 1, 14 | "retry_delay": timedelta(minutes=5), 15 | } 16 | 17 | with DAG( 18 | "bash_dag_sample", default_args=default_args, schedule="5 10 * * *", catchup=False 19 | ) as dag: 20 | t1 = BashOperator(task_id="print_hello", bash_command='echo "hello world"') 21 | 22 | t1 23 | -------------------------------------------------------------------------------- /examples/dags/sample_docker_operator_dag.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | from airflow import DAG 4 | from airflow.providers.docker.operators.docker import DockerOperator 5 | 6 | default_args = { 7 | "owner": "Mostafa Ghadimi", 8 | "description": "Use of the DockerOperator", 9 | "depend_on_past": False, 10 | "start_date": datetime(2023, 4, 23), 11 | "email_on_failure": False, 12 | "email_on_retry": False, 13 | "retries": 1, 14 | "retry_delay": timedelta(minutes=5), 15 | } 16 | 17 | with DAG( 18 | "docker_dag_v2", 19 | default_args=default_args, 20 | schedule="5 10 * * *", 21 | catchup=False, 22 | ) as dag: 23 | t1 = DockerOperator( 24 | task_id="docker_command", 25 | image="alpine:latest", 26 | api_version="auto", 27 | auto_remove="success", 28 | command="/bin/sleep 30", 29 | docker_url="unix://var/run/docker.sock", 30 | network_mode="bridge", 31 | ) 32 | 33 | t1 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 DataBurst 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the Codebase 🚀 2 | 3 | This document shares some best practices and tips for working on our code base. 4 | 5 | ## Commit Message 📝 6 | Write a good commit message that follows this format: 7 | 8 | ``` 9 | [optional emoji] [type]([optional scope]): [subject] 10 | [BLANK LINE] 11 | [optional body] 12 | ``` 13 | Where: 14 | 15 | - `[optional emoji]` is an optional emoji that represents the type of change. You can use https://gitmoji.dev/ to find 16 | suitable emojis for your commits. For example, you can use :bug: for fix, :sparkles: for feat, :recycle: for refactor, 17 | etc. 18 | - `[type]` is one of these valid types: fix, feat, refactor, docs, test, chore. These types indicate what kind of change 19 | you made in your code. For example, fix means you fixed a bug, feat means you added a new feature, docs means you 20 | updated the documentation, etc. 21 | - `([optional scope])` is an optional scope that specifies what part of the code is affected by your change. For 22 | example, you can use (etl), (api), (db) to indicate which module or component you worked on. 23 | - `[subject]` is the message itself that briefly describes what changed and why. It should be concise and clear, 24 | preferably less than 50 characters. It should NOT start with a capital letter and should NOT end with a period. It 25 | should not explain how you changed the code, but what and why. 26 | - `[BLANK LINE]` is an empty line that separates the subject from the body. 27 | - `[optional body]` is an optional body that provides more details about your change. It should be written in 28 | paragraphs, each separated by a blank line. It should explain the problem you solved, how you solved it, and any other relevant information. It should wrap at 72 characters. 29 | 30 | For example: 31 | 32 | ``` 33 | :sparkles: feat(etl): add new data source for customer segmentation 34 | ``` 35 | 36 | If you have multiple commits in one merge request, you should squash them into a single commit with a suitable commit 37 | message when merging with the main branch. The commit message should summarize what changes you made in the merge 38 | request and why. It should also reference the issue that you resolved and include the ticket number from Jira. For 39 | example: 40 | 41 | ``` 42 | :recycle: refactor(ansible): move dev machines group vars to the role vars #123 43 | ``` 44 | 45 | This indicates that you resolved ticket number 123, which was about refactoring the variables of development machines. 46 | 47 | 48 | ## Branching Name Convention :twisted_rightwards_arrows: 49 | 50 | Before creating a feature or resolving a bug, it is required to create an issue on GitHub. This issue will be assigned a number, which should be included in the branch name, separated by hyphens (-). Here is an example of a valid Git branch name: 51 | ```bash 52 | 2-feature-add-contribution-documentation 53 | ``` 54 | 55 | ## Feature Development 🛠️ 56 | 57 | To write a new feature, follow these steps: 58 | 59 | 1. Create an issue in Github and explain what should be done or resolved. 60 | 1. Create a pull request and a corresponding branch for the issue. Assign the task and review to appropriate people. 61 | 1. Start committing in the specified branch. You can have separate changes in different commits in this branch. At the 62 | time of merging with the main branch, you should squash all of these commits into a single commit with an appropriate 63 | commit message for the whole merge request. 64 | 1. After the commits, check for the running of CI which runs a lint and for some codes runs the appropriate tests. If 65 | something fails, fix them. 66 | 67 | 68 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | 4 | ## v1.0.0 (2025-08-15) 5 | 6 | ### Features 7 | 8 | - Migrate docker compose and examples to the airflow 3 (#31) 9 | ([#31](https://github.com/data-burst/airflow-git-sync/pull/31), 10 | [`5155aec`](https://github.com/data-burst/airflow-git-sync/commit/5155aecf2d11ba1d5a2aba574a79b6a8a1bbe550)) 11 | 12 | 13 | ## v0.1.0 (2025-08-15) 14 | 15 | ### Bug Fixes 16 | 17 | - Resolve the default repository address in the docker-compose file (#17) 18 | ([#17](https://github.com/data-burst/airflow-git-sync/pull/17), 19 | [`3078463`](https://github.com/data-burst/airflow-git-sync/commit/3078463686c472f11c6ee3e3fb46318921ff1af6)) 20 | 21 | - Resolve the git-sync image in the documentation and reformat the docker-compose file to yaml (#16) 22 | ([#16](https://github.com/data-burst/airflow-git-sync/pull/16), 23 | [`a75681c`](https://github.com/data-burst/airflow-git-sync/commit/a75681cf60bd5822cf5560bf292211f6382720f5)) 24 | 25 | - **entrypoint**: Resolve git clean issue and use global variable for the project directory (#1) 26 | ([#1](https://github.com/data-burst/airflow-git-sync/pull/1), 27 | [`3340a7e`](https://github.com/data-burst/airflow-git-sync/commit/3340a7eec83b288477138068bfad954551bfff22)) 28 | 29 | - **github-action**: Add the working CI for pushing the latest tag to the Dockerhub 30 | ([`4a0ed1e`](https://github.com/data-burst/airflow-git-sync/commit/4a0ed1ed04d42b8a623e6ec249078723fa911f3c)) 31 | 32 | ### Chores 33 | 34 | - Add .gitignore file to the repository 35 | ([`3ca7b66`](https://github.com/data-burst/airflow-git-sync/commit/3ca7b66085ced31ccfa025b8374361199fbbd764)) 36 | 37 | - **docker**: Change the maintainer email in Dockerfile of Git sync 38 | ([`4ecbf9b`](https://github.com/data-burst/airflow-git-sync/commit/4ecbf9bb4220d5ac56762ec1f9f20f9e773b31d4)) 39 | 40 | - **license**: Add MIT license file 41 | ([`00c6d58`](https://github.com/data-burst/airflow-git-sync/commit/00c6d58684652b7b86f323784aac8e54250368ea)) 42 | 43 | ### Documentation 44 | 45 | - Add FAQ collapsible section to README (#21) 46 | ([#21](https://github.com/data-burst/airflow-git-sync/pull/21), 47 | [`0ed1461`](https://github.com/data-burst/airflow-git-sync/commit/0ed1461e22be1a8a82dfdab729fa32efb6dc5d49)) 48 | 49 | - Add GIT_PULL_REBASE parameter explanation to the documentation and set the required/optional 50 | fields (#24) ([#24](https://github.com/data-burst/airflow-git-sync/pull/24), 51 | [`146b055`](https://github.com/data-burst/airflow-git-sync/commit/146b05599861f2a3e04d346afb1e66d5adda748c)) 52 | 53 | - Add instruction for setting right Airflow user and create directories (#2) (#4) 54 | ([#4](https://github.com/data-burst/airflow-git-sync/pull/4), 55 | [`2e690b5`](https://github.com/data-burst/airflow-git-sync/commit/2e690b5494531a076147337318e9eef18006427c)) 56 | 57 | - **contribution**: Add contribution guide 58 | ([`038d5f1`](https://github.com/data-burst/airflow-git-sync/commit/038d5f1332452e443e6e0c2ac0908bec106d4ebb)) 59 | 60 | - **emoji**: Resolve the space between character and emoji 61 | ([`2b4bc8b`](https://github.com/data-burst/airflow-git-sync/commit/2b4bc8b2789cc3898684ec52392f78b31b32447b)) 62 | 63 | - **git-sync**: Add Kubernetes Git Sync sidecar container link to its repository in the 64 | documentation 65 | ([`859d3db`](https://github.com/data-burst/airflow-git-sync/commit/859d3dbf7b93e0303badc30a30a54f63f9d361fd)) 66 | 67 | - **links**: Fix contribution link and license link 68 | ([`42122ac`](https://github.com/data-burst/airflow-git-sync/commit/42122ac734baabe8e0fe1e2d726f4d365b52ec2c)) 69 | 70 | - **readme**: Add a part in project usage about how to add ssh key and handle it on github (#3) (#8) 71 | ([#8](https://github.com/data-burst/airflow-git-sync/pull/8), 72 | [`99a933c`](https://github.com/data-burst/airflow-git-sync/commit/99a933c16b9fab966cf55cfbdeebda748871e944)) 73 | 74 | - **readme-project-usage**: Fix project usage part by removing extra lines 75 | ([`cc553f6`](https://github.com/data-burst/airflow-git-sync/commit/cc553f6c44c144e6f4e8aed25c149de8214ccfa8)) 76 | 77 | - **README.md**: Improve documentation to make its usage more sensible (#9) 78 | ([#9](https://github.com/data-burst/airflow-git-sync/pull/9), 79 | [`4ec528e`](https://github.com/data-burst/airflow-git-sync/commit/4ec528e41638fc4625020feab2812594b8d05938)) 80 | 81 | ### Features 82 | 83 | - Add semantic versioning workflow (#30) 84 | ([#30](https://github.com/data-burst/airflow-git-sync/pull/30), 85 | [`156326a`](https://github.com/data-burst/airflow-git-sync/commit/156326a21d31bfd8196382021e6858f93a0c8c06)) 86 | 87 | - Remove unused GIT_URL variable from Docker Compose file and README (#7) 88 | ([#7](https://github.com/data-burst/airflow-git-sync/pull/7), 89 | [`4ed3a18`](https://github.com/data-burst/airflow-git-sync/commit/4ed3a181b25de86007561a25e2d4172567b91528)) 90 | 91 | - **compose**: Add docker-compose file in order to setup Airflow and Git-sync 92 | ([`ba357d6`](https://github.com/data-burst/airflow-git-sync/commit/ba357d66d0544c66dce3769640878ecda68c9502)) 93 | 94 | - **compose**: Add image instead of context in order to prevent from building the image from scratch 95 | ([`5959b90`](https://github.com/data-burst/airflow-git-sync/commit/5959b9053681c4a6dbaf256d948f8d4a350bcbfe)) 96 | 97 | - **docker**: Add Dockerfile for Git-Sync image 98 | ([`d103906`](https://github.com/data-burst/airflow-git-sync/commit/d103906e603e8679ea06ef3303b3121d8fedea23)) 99 | 100 | - **document**: Add README.md file and images 101 | ([`285781f`](https://github.com/data-burst/airflow-git-sync/commit/285781f43e37782d7afe573943a327ebc9ee60ef)) 102 | 103 | - **examples**: Add example DAGs directory for BashOperator and DockerOperator 104 | ([`5433e78`](https://github.com/data-burst/airflow-git-sync/commit/5433e78d8e62f48d5819dadaf598fb2b9e137bbd)) 105 | 106 | - **github-actions**: Add CI for building and pushing Docker image to Dockerhub 107 | ([`d54827d`](https://github.com/data-burst/airflow-git-sync/commit/d54827d1a13802bd9c9278d5a9e71345eb250a45)) 108 | 109 | - **script**: Add entrypoint script for building Docker image 110 | ([`18d9733`](https://github.com/data-burst/airflow-git-sync/commit/18d973340dd704c4b4ddd67a9b80cd75fe723895)) 111 | 112 | ### Refactoring 113 | 114 | - Change the default value of REPO_URL in docker-compose file and change it in README file (#13) 115 | ([#13](https://github.com/data-burst/airflow-git-sync/pull/13), 116 | [`891e84b`](https://github.com/data-burst/airflow-git-sync/commit/891e84be0b6d76dfab44d987b3dd9d8001f0ca0f)) 117 | 118 | - Separate git-sync and airflow-git-sync repositories (#19) 119 | ([#19](https://github.com/data-burst/airflow-git-sync/pull/19), 120 | [`45c55ec`](https://github.com/data-burst/airflow-git-sync/commit/45c55ec348e9af37ac0bc9332a7a0d2cc92c3788)) 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Airflow Git Sync 2 | 3 | 🎉 What if you could instantly sync DAG changes from **Git** to **Airflow**? Well now you can! 4 | 5 |

6 | airflow git sync 7 |

8 | 9 |

10 | 11 | contributors 12 | 13 | 14 | license 15 | 16 |

17 | 18 | ## Table of Contents 🏗️ 19 | 20 | - [Airflow Git Sync](#airflow-git-sync) 21 | - [Table of Contents 🏗️](#table-of-contents-️) 22 | - [Project Description 🌱](#project-description-) 23 | - [Project Usage 🧑‍💻](#project-usage-) 24 | - [Setting Up Your SSH Key](#setting-up-your-ssh-key) 25 | - [Using Airflow Git Sync](#using-airflow-git-sync) 26 | - [FAQ ❓](#faq-) 27 | - [Contributing 👥](#contributing-) 28 | - [License 📄](#license-) 29 | 30 | ## Project Description 🌱 31 | 32 | Airflow Git Sync provides automated DAG deployments from Git for Airflow environments. It syncs your pipeline code from a Git repository into the Airflow DAG folder. 33 | 34 | Keeping DAGs directly in Airflow servers makes management challenging. Code changes require manual syncing to containers. There is no version control or history. 35 | If you have ever worked with Airflow on Kubernetes, it gives you the ability to sync the DAGs with your repository (as an GitOps solution) using [git-sync sidecar contanier](https://github.com/kubernetes/git-sync). If you don't have Kubernetes, it is hard to keep the DAGs directory of Airflow (which is placed at `/opt/airflow/dags/`) synced with the changes you applied to your DAGs and in some cases it is required to restart the Airflow service or container. 36 | 37 | The project introduces `git-sync` application alongside Airflow. This handles cloning your configured DAG Git repository and syncing contents over to Airflow's DAG directory. 38 | 39 | The syncing is achieved via a lightweight Docker container that runs periodically using inotify wait to detect file changes. The container can be deployed using docker-compose alongside Airflow. Here is a bit of the docker-compose file: 40 | 41 | ```bash 42 | airflow-webserver: 43 | # Airflow container 44 | 45 | airflow-scheduler: 46 | # Airflow container 47 | 48 | git-sync: 49 | # Git-sync container 50 | image: databurst/git-sync:latest 51 | environment: 52 | REPO_URL: .git 53 | #...other config 54 | ``` 55 | 56 | The git-sync container will keep DAGs in Airflow containers continually synced from files committed to the Git repository. 57 | 58 | The end result is Airflow DAGs can be managed via Git best practices. Changes are automatically reflected in your pipeline deployment. No need for complex Kubernetes just to get basic Git sync! 59 | 60 |

61 | airflow git sync diagram 62 |

63 | 64 | ## Project Usage 🧑‍💻 65 | 66 | **Important Tip**: 67 | 68 | Before you can use the project, based on [Airflow's documentation](https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user), you need to ensure that Airflow has the correct permissions for the required directories. To do this, execute the following commands in the directory where your `docker-compose.yaml` file is located: 69 | 70 | ```bash 71 | mkdir -p ./dags ./logs ./plugins ./config 72 | echo -e "AIRFLOW_UID=$(id -u)" > .env 73 | ``` 74 | 75 | ### Setting Up Your SSH Key 76 | 77 | 1. **Generate an SSH Key**: If you don't already have an SSH key, you can generate one using the following steps: 78 | 79 | - Open a Terminal: Open your terminal or command prompt. 80 | 81 | - Generate SSH Key: Run the following command to generate a new SSH key: 82 | 83 | ```bash 84 | ssh-keygen -t 85 | # example 86 | # ssh-keygen -t id_ed25519 87 | ``` 88 | 89 | Replace `` with the desired key type (e.g., `ed25519`, `rsa`). 90 | 91 | - Follow Prompts: You'll be prompted to choose a location for your SSH key. Press Enter to accept the default location (usually `~/.ssh/id_`) or specify a different one. 92 | 93 | 2. **Adding SSH Key to Your Git Account**: To use your SSH key with Git, you need to add your public key to your Git account. Here's how: 94 | 95 | - Go to your Git account settings on the web (e.g., GitHub, GitLab). 96 | 97 | - Navigate to "SSH and GPG keys" or a similar section. 98 | 99 | - Click "New SSH key" or equivalent. 100 | 101 | - Paste your public key into the provided field and give it a meaningful title. 102 | 103 | 3. **Updating Docker Compose**: To ensure that your SSH key is correctly mounted in the `git-sync` container, modify the relevant line in your `docker-compose.yaml` file as follows: 104 | 105 | ```yaml 106 | - ${GIT_SSH_KEY:-~/.ssh/}:/root/.ssh/ 107 | 108 | ### Using Airflow Git Sync 109 | 110 | Using Airflow Git Sync is simple: 111 | 112 | 1. Clone the repository. 113 | 2. Configure git-sync via environment variables in `docker-compose.yaml` file: 114 | 115 | | Variable | Description | Default Value | 116 | | --- | --- | --- | 117 | | `REPO_URL` | The URL of the Git repository to sync | `git@github.com:data-burst/airflow_git_sync.git` (required) | 118 | | `GIT_URL` | The URL of the Git remote (needed to add host fingerprint to known_hosts) | `github.com` (required) | 119 | | `SUBFOLDER_PATH` | The repository sub-folder to sync. Leaving empty copies the entire repo | `N/A` (optional) | 120 | | `GIT_BRANCH` | The Git branch to sync | `main` (optional) | 121 | | `DIRECTORY_NAME` | The name of the directory to clone the repository into | `project` (optional) | 122 | | `DESTINATION_PATH` | The path to sync the repository to | `/app/sync` (optional) | 123 | | `INTERVAL` | The interval (in seconds) to sync the repository | `10` (optional)| 124 | | `GIT_PULL_REBASE` | Determines the Git pull strategy. If set to `true`, it configures `git config pull.rebase` to use `rebase` during pulls. If `false`, it defaults to `merge`. | `false` (optional)| 125 | 126 | 4. In order to deploy the Airflow with the configured Git-sync, simply run the ‍`docker compose up -d‍` command. 127 | 5. Enjoy! 128 | 129 | ## FAQ ❓ 130 | 131 | In this section you can find and resolve your common issues that faced to. 132 | 133 |
134 | 135 | Internet Connection Issue 136 | 137 | 138 | If you've seen the following error using `docker logs -f ` command, the probable root cause may be is that you are connected to VPN! 139 | 140 | > getaddrinfo github.com: Try again 141 | ssh: Could not resolve hostname github.com: Try again 142 | fatal: Could not read from remote repository. 143 | 144 | For more information, checkout [this](https://stackoverflow.com/questions/20430371/my-docker-container-has-no-internet) link. 145 |
146 | 147 | ## Contributing 👥 148 | 149 | We welcome contributions to this repository! If you’re interested in contributing, please take a look at our [CONTIRIBUTION.md](CONTRIBUTING.md) file for more information on how to get started. We look forward to collaborating with you! 150 | 151 | ## License 📄 152 | 153 | This repository is licensed under the MIT License, which is a permissive open-source license that allows for reuse and modification of the code with few restrictions. You can find the full text of the license in [this](LICENSE) file. 154 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | 19 | # Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. 20 | # 21 | # WARNING: This configuration is for local development. Do not use it in a production deployment. 22 | # 23 | # This configuration supports basic configuration using environment variables or an .env file 24 | # The following variables are supported: 25 | # 26 | # AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. 27 | # Default: apache/airflow:3.0.4 28 | # AIRFLOW_UID - User ID in Airflow containers 29 | # Default: 50000 30 | # AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed. 31 | # Default: . 32 | # Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode 33 | # 34 | # _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). 35 | # Default: airflow 36 | # _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). 37 | # Default: airflow 38 | # _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. 39 | # Use this option ONLY for quick checks. Installing requirements at container 40 | # startup is done EVERY TIME the service is started. 41 | # A better way is to build a custom image or extend the official image 42 | # as described in https://airflow.apache.org/docs/docker-stack/build.html. 43 | # Default: '' 44 | # 45 | # Feel free to modify this file to suit your needs. 46 | --- 47 | x-airflow-common: 48 | &airflow-common 49 | # In order to add custom dependencies or upgrade provider distributions you can use your extended image. 50 | # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml 51 | # and uncomment the "build" line below, Then run `docker-compose build` to build the images. 52 | image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:3.0.4} 53 | # build: . 54 | environment: 55 | &airflow-common-env 56 | AIRFLOW__CORE__EXECUTOR: CeleryExecutor 57 | AIRFLOW__CORE__AUTH_MANAGER: airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager 58 | AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow 59 | AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow 60 | AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 61 | AIRFLOW__CORE__FERNET_KEY: '' 62 | AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' 63 | AIRFLOW__CORE__LOAD_EXAMPLES: 'true' 64 | AIRFLOW__CORE__EXECUTION_API_SERVER_URL: 'http://airflow-apiserver:8080/execution/' 65 | # yamllint disable rule:line-length 66 | # Use simple http server on scheduler for health checks 67 | # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server 68 | # yamllint enable rule:line-length 69 | AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' 70 | # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks 71 | # for other purpose (development, test and especially production usage) build/extend Airflow image. 72 | _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} 73 | # The following line can be used to set a custom config file, stored in the local config folder 74 | AIRFLOW_CONFIG: '/opt/airflow/config/airflow.cfg' 75 | volumes: 76 | - ./project_data:/opt/airflow/dags 77 | - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs 78 | - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config 79 | - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins 80 | user: "${AIRFLOW_UID:-50000}:0" 81 | depends_on: 82 | &airflow-common-depends-on 83 | redis: 84 | condition: service_healthy 85 | postgres: 86 | condition: service_healthy 87 | 88 | services: 89 | postgres: 90 | image: postgres:13 91 | environment: 92 | POSTGRES_USER: airflow 93 | POSTGRES_PASSWORD: airflow 94 | POSTGRES_DB: airflow 95 | volumes: 96 | - postgres-db-volume:/var/lib/postgresql/data 97 | healthcheck: 98 | test: ["CMD", "pg_isready", "-U", "airflow"] 99 | interval: 10s 100 | retries: 5 101 | start_period: 5s 102 | restart: always 103 | 104 | redis: 105 | # Redis is limited to 7.2-bookworm due to licencing change 106 | # https://redis.io/blog/redis-adopts-dual-source-available-licensing/ 107 | image: redis:7.2-bookworm 108 | expose: 109 | - 6379 110 | healthcheck: 111 | test: ["CMD", "redis-cli", "ping"] 112 | interval: 10s 113 | timeout: 30s 114 | retries: 50 115 | start_period: 30s 116 | restart: always 117 | 118 | airflow-apiserver: 119 | <<: *airflow-common 120 | command: api-server 121 | ports: 122 | - "8080:8080" 123 | healthcheck: 124 | test: ["CMD", "curl", "--fail", "http://localhost:8080/api/v2/version"] 125 | interval: 30s 126 | timeout: 10s 127 | retries: 5 128 | start_period: 30s 129 | restart: always 130 | depends_on: 131 | <<: *airflow-common-depends-on 132 | airflow-init: 133 | condition: service_completed_successfully 134 | 135 | airflow-scheduler: 136 | <<: *airflow-common 137 | command: scheduler 138 | healthcheck: 139 | test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] 140 | interval: 30s 141 | timeout: 10s 142 | retries: 5 143 | start_period: 30s 144 | restart: always 145 | depends_on: 146 | <<: *airflow-common-depends-on 147 | airflow-init: 148 | condition: service_completed_successfully 149 | 150 | airflow-dag-processor: 151 | <<: *airflow-common 152 | command: dag-processor 153 | healthcheck: 154 | test: ["CMD-SHELL", 'airflow jobs check --job-type DagProcessorJob --hostname "$${HOSTNAME}"'] 155 | interval: 30s 156 | timeout: 10s 157 | retries: 5 158 | start_period: 30s 159 | restart: always 160 | depends_on: 161 | <<: *airflow-common-depends-on 162 | airflow-init: 163 | condition: service_completed_successfully 164 | 165 | airflow-worker: 166 | <<: *airflow-common 167 | command: celery worker 168 | healthcheck: 169 | # yamllint disable rule:line-length 170 | test: 171 | - "CMD-SHELL" 172 | - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' 173 | interval: 30s 174 | timeout: 10s 175 | retries: 5 176 | start_period: 30s 177 | environment: 178 | <<: *airflow-common-env 179 | # Required to handle warm shutdown of the celery workers properly 180 | # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation 181 | DUMB_INIT_SETSID: "0" 182 | restart: always 183 | depends_on: 184 | <<: *airflow-common-depends-on 185 | airflow-apiserver: 186 | condition: service_healthy 187 | airflow-init: 188 | condition: service_completed_successfully 189 | 190 | airflow-triggerer: 191 | <<: *airflow-common 192 | command: triggerer 193 | healthcheck: 194 | test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] 195 | interval: 30s 196 | timeout: 10s 197 | retries: 5 198 | start_period: 30s 199 | restart: always 200 | depends_on: 201 | <<: *airflow-common-depends-on 202 | airflow-init: 203 | condition: service_completed_successfully 204 | 205 | airflow-init: 206 | <<: *airflow-common 207 | entrypoint: /bin/bash 208 | # yamllint disable rule:line-length 209 | command: 210 | - -c 211 | - | 212 | if [[ -z "${AIRFLOW_UID}" ]]; then 213 | echo 214 | echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" 215 | echo "If you are on Linux, you SHOULD follow the instructions below to set " 216 | echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." 217 | echo "For other operating systems you can get rid of the warning with manually created .env file:" 218 | echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" 219 | echo 220 | export AIRFLOW_UID=$$(id -u) 221 | fi 222 | one_meg=1048576 223 | mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) 224 | cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) 225 | disk_available=$$(df / | tail -1 | awk '{print $$4}') 226 | warning_resources="false" 227 | if (( mem_available < 4000 )) ; then 228 | echo 229 | echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" 230 | echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" 231 | echo 232 | warning_resources="true" 233 | fi 234 | if (( cpus_available < 2 )); then 235 | echo 236 | echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" 237 | echo "At least 2 CPUs recommended. You have $${cpus_available}" 238 | echo 239 | warning_resources="true" 240 | fi 241 | if (( disk_available < one_meg * 10 )); then 242 | echo 243 | echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" 244 | echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" 245 | echo 246 | warning_resources="true" 247 | fi 248 | if [[ $${warning_resources} == "true" ]]; then 249 | echo 250 | echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" 251 | echo "Please follow the instructions to increase amount of resources available:" 252 | echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" 253 | echo 254 | fi 255 | echo 256 | echo "Creating missing opt dirs if missing:" 257 | echo 258 | mkdir -v -p /opt/airflow/{logs,dags,plugins,config} 259 | echo 260 | echo "Airflow version:" 261 | /entrypoint airflow version 262 | echo 263 | echo "Files in shared volumes:" 264 | echo 265 | ls -la /opt/airflow/{logs,dags,plugins,config} 266 | echo 267 | echo "Running airflow config list to create default config file if missing." 268 | echo 269 | /entrypoint airflow config list >/dev/null 270 | echo 271 | echo "Files in shared volumes:" 272 | echo 273 | ls -la /opt/airflow/{logs,dags,plugins,config} 274 | echo 275 | echo "Change ownership of files in /opt/airflow to ${AIRFLOW_UID}:0" 276 | echo 277 | chown -R "${AIRFLOW_UID}:0" /opt/airflow/ 278 | echo 279 | echo "Change ownership of files in shared volumes to ${AIRFLOW_UID}:0" 280 | echo 281 | chown -v -R "${AIRFLOW_UID}:0" /opt/airflow/{logs,dags,plugins,config} 282 | echo 283 | echo "Files in shared volumes:" 284 | echo 285 | ls -la /opt/airflow/{logs,dags,plugins,config} 286 | 287 | # yamllint enable rule:line-length 288 | environment: 289 | <<: *airflow-common-env 290 | _AIRFLOW_DB_MIGRATE: 'true' 291 | _AIRFLOW_WWW_USER_CREATE: 'true' 292 | _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} 293 | _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} 294 | _PIP_ADDITIONAL_REQUIREMENTS: '' 295 | user: "0:0" 296 | 297 | airflow-cli: 298 | <<: *airflow-common 299 | profiles: 300 | - debug 301 | environment: 302 | <<: *airflow-common-env 303 | CONNECTION_CHECK_MAX_COUNT: "0" 304 | # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 305 | command: 306 | - bash 307 | - -c 308 | - airflow 309 | depends_on: 310 | <<: *airflow-common-depends-on 311 | 312 | # You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up 313 | # or by explicitly targeted on the command line e.g. docker-compose up flower. 314 | # See: https://docs.docker.com/compose/profiles/ 315 | flower: 316 | <<: *airflow-common 317 | command: celery flower 318 | profiles: 319 | - flower 320 | ports: 321 | - "5555:5555" 322 | healthcheck: 323 | test: ["CMD", "curl", "--fail", "http://localhost:5555/"] 324 | interval: 30s 325 | timeout: 10s 326 | retries: 5 327 | start_period: 30s 328 | restart: always 329 | depends_on: 330 | <<: *airflow-common-depends-on 331 | airflow-init: 332 | condition: service_completed_successfully 333 | 334 | git-sync: 335 | image: databurst/git-sync:latest 336 | volumes: 337 | - ./project_data:${DESTINATION_PATH:-/app/sync} 338 | - ${GIT_SSH_KEY:-~/.ssh/id_ed25519}:/root/.ssh/id_ed25519 339 | environment: 340 | REPO_URL: ${REPO_URL:-git@github.com:data-burst/airflow-git-sync.git} 341 | GIT_BRANCH: ${GIT_BRANCH:-main} 342 | DIRECTORY_NAME: ${DIRECTORY_NAME:-project} 343 | DESTINATION_PATH: ${DESTINATION_PATH:-/app/sync} 344 | INTERVAL: ${INTERVAL:-10} 345 | volumes: 346 | postgres-db-volume: 347 | --------------------------------------------------------------------------------