├── .gitignore
├── images
├── git.png
├── airflow.png
├── git_sync.jpg
└── architecture.jpg
├── .github
└── workflows
│ └── release.yaml
├── examples
└── dags
│ ├── sample_bash_operator_dag.py
│ └── sample_docker_operator_dag.py
├── LICENSE
├── CONTRIBUTING.md
├── CHANGELOG.md
├── README.md
└── docker-compose.yaml
/.gitignore:
--------------------------------------------------------------------------------
1 | dags/
2 | plugins/
3 | logs/
4 | config/
5 | project_data/
6 |
7 |
--------------------------------------------------------------------------------
/images/git.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/git.png
--------------------------------------------------------------------------------
/images/airflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/airflow.png
--------------------------------------------------------------------------------
/images/git_sync.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/git_sync.jpg
--------------------------------------------------------------------------------
/images/architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-burst/airflow-git-sync/HEAD/images/architecture.jpg
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 |
6 | jobs:
7 | release:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout code
11 | uses: actions/checkout@v4
12 | with:
13 | fetch-depth: 0
14 | token: ${{ secrets.GITHUB_TOKEN }}
15 |
16 | - name: DataBurst Python Semantic Versioning
17 | uses: mostafaghadimi/python-semantic-versioning@main
18 | with:
19 | gh_token: ${{ secrets.GITHUB_TOKEN }}
20 | debug: true
21 |
--------------------------------------------------------------------------------
/examples/dags/sample_bash_operator_dag.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta
2 |
3 | from airflow import DAG
4 | from airflow.providers.standard.operators.bash import BashOperator
5 |
6 | default_args = {
7 | "owner": "Mostafa Ghadimi",
8 | "description": "Use of the BashOperator",
9 | "depend_on_past": False,
10 | "start_date": datetime(2023, 4, 4),
11 | "email_on_failure": False,
12 | "email_on_retry": False,
13 | "retries": 1,
14 | "retry_delay": timedelta(minutes=5),
15 | }
16 |
17 | with DAG(
18 | "bash_dag_sample", default_args=default_args, schedule="5 10 * * *", catchup=False
19 | ) as dag:
20 | t1 = BashOperator(task_id="print_hello", bash_command='echo "hello world"')
21 |
22 | t1
23 |
--------------------------------------------------------------------------------
/examples/dags/sample_docker_operator_dag.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta
2 |
3 | from airflow import DAG
4 | from airflow.providers.docker.operators.docker import DockerOperator
5 |
6 | default_args = {
7 | "owner": "Mostafa Ghadimi",
8 | "description": "Use of the DockerOperator",
9 | "depend_on_past": False,
10 | "start_date": datetime(2023, 4, 23),
11 | "email_on_failure": False,
12 | "email_on_retry": False,
13 | "retries": 1,
14 | "retry_delay": timedelta(minutes=5),
15 | }
16 |
17 | with DAG(
18 | "docker_dag_v2",
19 | default_args=default_args,
20 | schedule="5 10 * * *",
21 | catchup=False,
22 | ) as dag:
23 | t1 = DockerOperator(
24 | task_id="docker_command",
25 | image="alpine:latest",
26 | api_version="auto",
27 | auto_remove="success",
28 | command="/bin/sleep 30",
29 | docker_url="unix://var/run/docker.sock",
30 | network_mode="bridge",
31 | )
32 |
33 | t1
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 DataBurst
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to the Codebase 🚀
2 |
3 | This document shares some best practices and tips for working on our code base.
4 |
5 | ## Commit Message 📝
6 | Write a good commit message that follows this format:
7 |
8 | ```
9 | [optional emoji] [type]([optional scope]): [subject]
10 | [BLANK LINE]
11 | [optional body]
12 | ```
13 | Where:
14 |
15 | - `[optional emoji]` is an optional emoji that represents the type of change. You can use https://gitmoji.dev/ to find
16 | suitable emojis for your commits. For example, you can use :bug: for fix, :sparkles: for feat, :recycle: for refactor,
17 | etc.
18 | - `[type]` is one of these valid types: fix, feat, refactor, docs, test, chore. These types indicate what kind of change
19 | you made in your code. For example, fix means you fixed a bug, feat means you added a new feature, docs means you
20 | updated the documentation, etc.
21 | - `([optional scope])` is an optional scope that specifies what part of the code is affected by your change. For
22 | example, you can use (etl), (api), (db) to indicate which module or component you worked on.
23 | - `[subject]` is the message itself that briefly describes what changed and why. It should be concise and clear,
24 | preferably less than 50 characters. It should NOT start with a capital letter and should NOT end with a period. It
25 | should not explain how you changed the code, but what and why.
26 | - `[BLANK LINE]` is an empty line that separates the subject from the body.
27 | - `[optional body]` is an optional body that provides more details about your change. It should be written in
28 | paragraphs, each separated by a blank line. It should explain the problem you solved, how you solved it, and any other relevant information. It should wrap at 72 characters.
29 |
30 | For example:
31 |
32 | ```
33 | :sparkles: feat(etl): add new data source for customer segmentation
34 | ```
35 |
36 | If you have multiple commits in one merge request, you should squash them into a single commit with a suitable commit
37 | message when merging with the main branch. The commit message should summarize what changes you made in the merge
38 | request and why. It should also reference the issue that you resolved and include the ticket number from Jira. For
39 | example:
40 |
41 | ```
42 | :recycle: refactor(ansible): move dev machines group vars to the role vars #123
43 | ```
44 |
45 | This indicates that you resolved ticket number 123, which was about refactoring the variables of development machines.
46 |
47 |
48 | ## Branching Name Convention :twisted_rightwards_arrows:
49 |
50 | Before creating a feature or resolving a bug, it is required to create an issue on GitHub. This issue will be assigned a number, which should be included in the branch name, separated by hyphens (-). Here is an example of a valid Git branch name:
51 | ```bash
52 | 2-feature-add-contribution-documentation
53 | ```
54 |
55 | ## Feature Development 🛠️
56 |
57 | To write a new feature, follow these steps:
58 |
59 | 1. Create an issue in Github and explain what should be done or resolved.
60 | 1. Create a pull request and a corresponding branch for the issue. Assign the task and review to appropriate people.
61 | 1. Start committing in the specified branch. You can have separate changes in different commits in this branch. At the
62 | time of merging with the main branch, you should squash all of these commits into a single commit with an appropriate
63 | commit message for the whole merge request.
64 | 1. After the commits, check for the running of CI which runs a lint and for some codes runs the appropriate tests. If
65 | something fails, fix them.
66 |
67 |
68 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 |
4 | ## v1.0.0 (2025-08-15)
5 |
6 | ### Features
7 |
8 | - Migrate docker compose and examples to the airflow 3 (#31)
9 | ([#31](https://github.com/data-burst/airflow-git-sync/pull/31),
10 | [`5155aec`](https://github.com/data-burst/airflow-git-sync/commit/5155aecf2d11ba1d5a2aba574a79b6a8a1bbe550))
11 |
12 |
13 | ## v0.1.0 (2025-08-15)
14 |
15 | ### Bug Fixes
16 |
17 | - Resolve the default repository address in the docker-compose file (#17)
18 | ([#17](https://github.com/data-burst/airflow-git-sync/pull/17),
19 | [`3078463`](https://github.com/data-burst/airflow-git-sync/commit/3078463686c472f11c6ee3e3fb46318921ff1af6))
20 |
21 | - Resolve the git-sync image in the documentation and reformat the docker-compose file to yaml (#16)
22 | ([#16](https://github.com/data-burst/airflow-git-sync/pull/16),
23 | [`a75681c`](https://github.com/data-burst/airflow-git-sync/commit/a75681cf60bd5822cf5560bf292211f6382720f5))
24 |
25 | - **entrypoint**: Resolve git clean issue and use global variable for the project directory (#1)
26 | ([#1](https://github.com/data-burst/airflow-git-sync/pull/1),
27 | [`3340a7e`](https://github.com/data-burst/airflow-git-sync/commit/3340a7eec83b288477138068bfad954551bfff22))
28 |
29 | - **github-action**: Add the working CI for pushing the latest tag to the Dockerhub
30 | ([`4a0ed1e`](https://github.com/data-burst/airflow-git-sync/commit/4a0ed1ed04d42b8a623e6ec249078723fa911f3c))
31 |
32 | ### Chores
33 |
34 | - Add .gitignore file to the repository
35 | ([`3ca7b66`](https://github.com/data-burst/airflow-git-sync/commit/3ca7b66085ced31ccfa025b8374361199fbbd764))
36 |
37 | - **docker**: Change the maintainer email in Dockerfile of Git sync
38 | ([`4ecbf9b`](https://github.com/data-burst/airflow-git-sync/commit/4ecbf9bb4220d5ac56762ec1f9f20f9e773b31d4))
39 |
40 | - **license**: Add MIT license file
41 | ([`00c6d58`](https://github.com/data-burst/airflow-git-sync/commit/00c6d58684652b7b86f323784aac8e54250368ea))
42 |
43 | ### Documentation
44 |
45 | - Add FAQ collapsible section to README (#21)
46 | ([#21](https://github.com/data-burst/airflow-git-sync/pull/21),
47 | [`0ed1461`](https://github.com/data-burst/airflow-git-sync/commit/0ed1461e22be1a8a82dfdab729fa32efb6dc5d49))
48 |
49 | - Add GIT_PULL_REBASE parameter explanation to the documentation and set the required/optional
50 | fields (#24) ([#24](https://github.com/data-burst/airflow-git-sync/pull/24),
51 | [`146b055`](https://github.com/data-burst/airflow-git-sync/commit/146b05599861f2a3e04d346afb1e66d5adda748c))
52 |
53 | - Add instruction for setting right Airflow user and create directories (#2) (#4)
54 | ([#4](https://github.com/data-burst/airflow-git-sync/pull/4),
55 | [`2e690b5`](https://github.com/data-burst/airflow-git-sync/commit/2e690b5494531a076147337318e9eef18006427c))
56 |
57 | - **contribution**: Add contribution guide
58 | ([`038d5f1`](https://github.com/data-burst/airflow-git-sync/commit/038d5f1332452e443e6e0c2ac0908bec106d4ebb))
59 |
60 | - **emoji**: Resolve the space between character and emoji
61 | ([`2b4bc8b`](https://github.com/data-burst/airflow-git-sync/commit/2b4bc8b2789cc3898684ec52392f78b31b32447b))
62 |
63 | - **git-sync**: Add Kubernetes Git Sync sidecar container link to its repository in the
64 | documentation
65 | ([`859d3db`](https://github.com/data-burst/airflow-git-sync/commit/859d3dbf7b93e0303badc30a30a54f63f9d361fd))
66 |
67 | - **links**: Fix contribution link and license link
68 | ([`42122ac`](https://github.com/data-burst/airflow-git-sync/commit/42122ac734baabe8e0fe1e2d726f4d365b52ec2c))
69 |
70 | - **readme**: Add a part in project usage about how to add ssh key and handle it on github (#3) (#8)
71 | ([#8](https://github.com/data-burst/airflow-git-sync/pull/8),
72 | [`99a933c`](https://github.com/data-burst/airflow-git-sync/commit/99a933c16b9fab966cf55cfbdeebda748871e944))
73 |
74 | - **readme-project-usage**: Fix project usage part by removing extra lines
75 | ([`cc553f6`](https://github.com/data-burst/airflow-git-sync/commit/cc553f6c44c144e6f4e8aed25c149de8214ccfa8))
76 |
77 | - **README.md**: Improve documentation to make its usage more sensible (#9)
78 | ([#9](https://github.com/data-burst/airflow-git-sync/pull/9),
79 | [`4ec528e`](https://github.com/data-burst/airflow-git-sync/commit/4ec528e41638fc4625020feab2812594b8d05938))
80 |
81 | ### Features
82 |
83 | - Add semantic versioning workflow (#30)
84 | ([#30](https://github.com/data-burst/airflow-git-sync/pull/30),
85 | [`156326a`](https://github.com/data-burst/airflow-git-sync/commit/156326a21d31bfd8196382021e6858f93a0c8c06))
86 |
87 | - Remove unused GIT_URL variable from Docker Compose file and README (#7)
88 | ([#7](https://github.com/data-burst/airflow-git-sync/pull/7),
89 | [`4ed3a18`](https://github.com/data-burst/airflow-git-sync/commit/4ed3a181b25de86007561a25e2d4172567b91528))
90 |
91 | - **compose**: Add docker-compose file in order to setup Airflow and Git-sync
92 | ([`ba357d6`](https://github.com/data-burst/airflow-git-sync/commit/ba357d66d0544c66dce3769640878ecda68c9502))
93 |
94 | - **compose**: Add image instead of context in order to prevent from building the image from scratch
95 | ([`5959b90`](https://github.com/data-burst/airflow-git-sync/commit/5959b9053681c4a6dbaf256d948f8d4a350bcbfe))
96 |
97 | - **docker**: Add Dockerfile for Git-Sync image
98 | ([`d103906`](https://github.com/data-burst/airflow-git-sync/commit/d103906e603e8679ea06ef3303b3121d8fedea23))
99 |
100 | - **document**: Add README.md file and images
101 | ([`285781f`](https://github.com/data-burst/airflow-git-sync/commit/285781f43e37782d7afe573943a327ebc9ee60ef))
102 |
103 | - **examples**: Add example DAGs directory for BashOperator and DockerOperator
104 | ([`5433e78`](https://github.com/data-burst/airflow-git-sync/commit/5433e78d8e62f48d5819dadaf598fb2b9e137bbd))
105 |
106 | - **github-actions**: Add CI for building and pushing Docker image to Dockerhub
107 | ([`d54827d`](https://github.com/data-burst/airflow-git-sync/commit/d54827d1a13802bd9c9278d5a9e71345eb250a45))
108 |
109 | - **script**: Add entrypoint script for building Docker image
110 | ([`18d9733`](https://github.com/data-burst/airflow-git-sync/commit/18d973340dd704c4b4ddd67a9b80cd75fe723895))
111 |
112 | ### Refactoring
113 |
114 | - Change the default value of REPO_URL in docker-compose file and change it in README file (#13)
115 | ([#13](https://github.com/data-burst/airflow-git-sync/pull/13),
116 | [`891e84b`](https://github.com/data-burst/airflow-git-sync/commit/891e84be0b6d76dfab44d987b3dd9d8001f0ca0f))
117 |
118 | - Separate git-sync and airflow-git-sync repositories (#19)
119 | ([#19](https://github.com/data-burst/airflow-git-sync/pull/19),
120 | [`45c55ec`](https://github.com/data-burst/airflow-git-sync/commit/45c55ec348e9af37ac0bc9332a7a0d2cc92c3788))
121 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Airflow Git Sync
2 |
3 | 🎉 What if you could instantly sync DAG changes from **Git** to **Airflow**? Well now you can!
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | ## Table of Contents 🏗️
19 |
20 | - [Airflow Git Sync](#airflow-git-sync)
21 | - [Table of Contents 🏗️](#table-of-contents-️)
22 | - [Project Description 🌱](#project-description-)
23 | - [Project Usage 🧑💻](#project-usage-)
24 | - [Setting Up Your SSH Key](#setting-up-your-ssh-key)
25 | - [Using Airflow Git Sync](#using-airflow-git-sync)
26 | - [FAQ ❓](#faq-)
27 | - [Contributing 👥](#contributing-)
28 | - [License 📄](#license-)
29 |
30 | ## Project Description 🌱
31 |
32 | Airflow Git Sync provides automated DAG deployments from Git for Airflow environments. It syncs your pipeline code from a Git repository into the Airflow DAG folder.
33 |
34 | Keeping DAGs directly in Airflow servers makes management challenging. Code changes require manual syncing to containers. There is no version control or history.
35 | If you have ever worked with Airflow on Kubernetes, it gives you the ability to sync the DAGs with your repository (as an GitOps solution) using [git-sync sidecar contanier](https://github.com/kubernetes/git-sync). If you don't have Kubernetes, it is hard to keep the DAGs directory of Airflow (which is placed at `/opt/airflow/dags/`) synced with the changes you applied to your DAGs and in some cases it is required to restart the Airflow service or container.
36 |
37 | The project introduces `git-sync` application alongside Airflow. This handles cloning your configured DAG Git repository and syncing contents over to Airflow's DAG directory.
38 |
39 | The syncing is achieved via a lightweight Docker container that runs periodically using inotify wait to detect file changes. The container can be deployed using docker-compose alongside Airflow. Here is a bit of the docker-compose file:
40 |
41 | ```bash
42 | airflow-webserver:
43 | # Airflow container
44 |
45 | airflow-scheduler:
46 | # Airflow container
47 |
48 | git-sync:
49 | # Git-sync container
50 | image: databurst/git-sync:latest
51 | environment:
52 | REPO_URL: .git
53 | #...other config
54 | ```
55 |
56 | The git-sync container will keep DAGs in Airflow containers continually synced from files committed to the Git repository.
57 |
58 | The end result is Airflow DAGs can be managed via Git best practices. Changes are automatically reflected in your pipeline deployment. No need for complex Kubernetes just to get basic Git sync!
59 |
60 |
61 |
62 |
63 |
64 | ## Project Usage 🧑💻
65 |
66 | **Important Tip**:
67 |
68 | Before you can use the project, based on [Airflow's documentation](https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user), you need to ensure that Airflow has the correct permissions for the required directories. To do this, execute the following commands in the directory where your `docker-compose.yaml` file is located:
69 |
70 | ```bash
71 | mkdir -p ./dags ./logs ./plugins ./config
72 | echo -e "AIRFLOW_UID=$(id -u)" > .env
73 | ```
74 |
75 | ### Setting Up Your SSH Key
76 |
77 | 1. **Generate an SSH Key**: If you don't already have an SSH key, you can generate one using the following steps:
78 |
79 | - Open a Terminal: Open your terminal or command prompt.
80 |
81 | - Generate SSH Key: Run the following command to generate a new SSH key:
82 |
83 | ```bash
84 | ssh-keygen -t
85 | # example
86 | # ssh-keygen -t id_ed25519
87 | ```
88 |
89 | Replace `` with the desired key type (e.g., `ed25519`, `rsa`).
90 |
91 | - Follow Prompts: You'll be prompted to choose a location for your SSH key. Press Enter to accept the default location (usually `~/.ssh/id_`) or specify a different one.
92 |
93 | 2. **Adding SSH Key to Your Git Account**: To use your SSH key with Git, you need to add your public key to your Git account. Here's how:
94 |
95 | - Go to your Git account settings on the web (e.g., GitHub, GitLab).
96 |
97 | - Navigate to "SSH and GPG keys" or a similar section.
98 |
99 | - Click "New SSH key" or equivalent.
100 |
101 | - Paste your public key into the provided field and give it a meaningful title.
102 |
103 | 3. **Updating Docker Compose**: To ensure that your SSH key is correctly mounted in the `git-sync` container, modify the relevant line in your `docker-compose.yaml` file as follows:
104 |
105 | ```yaml
106 | - ${GIT_SSH_KEY:-~/.ssh/}:/root/.ssh/
107 |
108 | ### Using Airflow Git Sync
109 |
110 | Using Airflow Git Sync is simple:
111 |
112 | 1. Clone the repository.
113 | 2. Configure git-sync via environment variables in `docker-compose.yaml` file:
114 |
115 | | Variable | Description | Default Value |
116 | | --- | --- | --- |
117 | | `REPO_URL` | The URL of the Git repository to sync | `git@github.com:data-burst/airflow_git_sync.git` (required) |
118 | | `GIT_URL` | The URL of the Git remote (needed to add host fingerprint to known_hosts) | `github.com` (required) |
119 | | `SUBFOLDER_PATH` | The repository sub-folder to sync. Leaving empty copies the entire repo | `N/A` (optional) |
120 | | `GIT_BRANCH` | The Git branch to sync | `main` (optional) |
121 | | `DIRECTORY_NAME` | The name of the directory to clone the repository into | `project` (optional) |
122 | | `DESTINATION_PATH` | The path to sync the repository to | `/app/sync` (optional) |
123 | | `INTERVAL` | The interval (in seconds) to sync the repository | `10` (optional)|
124 | | `GIT_PULL_REBASE` | Determines the Git pull strategy. If set to `true`, it configures `git config pull.rebase` to use `rebase` during pulls. If `false`, it defaults to `merge`. | `false` (optional)|
125 |
126 | 4. In order to deploy the Airflow with the configured Git-sync, simply run the `docker compose up -d` command.
127 | 5. Enjoy!
128 |
129 | ## FAQ ❓
130 |
131 | In this section you can find and resolve your common issues that faced to.
132 |
133 |
134 |
135 | Internet Connection Issue
136 |
137 |
138 | If you've seen the following error using `docker logs -f ` command, the probable root cause may be is that you are connected to VPN!
139 |
140 | > getaddrinfo github.com: Try again
141 | ssh: Could not resolve hostname github.com: Try again
142 | fatal: Could not read from remote repository.
143 |
144 | For more information, checkout [this](https://stackoverflow.com/questions/20430371/my-docker-container-has-no-internet) link.
145 |
146 |
147 | ## Contributing 👥
148 |
149 | We welcome contributions to this repository! If you’re interested in contributing, please take a look at our [CONTIRIBUTION.md](CONTRIBUTING.md) file for more information on how to get started. We look forward to collaborating with you!
150 |
151 | ## License 📄
152 |
153 | This repository is licensed under the MIT License, which is a permissive open-source license that allows for reuse and modification of the code with few restrictions. You can find the full text of the license in [this](LICENSE) file.
154 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | #
18 |
19 | # Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
20 | #
21 | # WARNING: This configuration is for local development. Do not use it in a production deployment.
22 | #
23 | # This configuration supports basic configuration using environment variables or an .env file
24 | # The following variables are supported:
25 | #
26 | # AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
27 | # Default: apache/airflow:3.0.4
28 | # AIRFLOW_UID - User ID in Airflow containers
29 | # Default: 50000
30 | # AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed.
31 | # Default: .
32 | # Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
33 | #
34 | # _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested).
35 | # Default: airflow
36 | # _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested).
37 | # Default: airflow
38 | # _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
39 | # Use this option ONLY for quick checks. Installing requirements at container
40 | # startup is done EVERY TIME the service is started.
41 | # A better way is to build a custom image or extend the official image
42 | # as described in https://airflow.apache.org/docs/docker-stack/build.html.
43 | # Default: ''
44 | #
45 | # Feel free to modify this file to suit your needs.
46 | ---
47 | x-airflow-common:
48 | &airflow-common
49 | # In order to add custom dependencies or upgrade provider distributions you can use your extended image.
50 | # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
51 | # and uncomment the "build" line below, Then run `docker-compose build` to build the images.
52 | image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:3.0.4}
53 | # build: .
54 | environment:
55 | &airflow-common-env
56 | AIRFLOW__CORE__EXECUTOR: CeleryExecutor
57 | AIRFLOW__CORE__AUTH_MANAGER: airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager
58 | AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
59 | AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
60 | AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
61 | AIRFLOW__CORE__FERNET_KEY: ''
62 | AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
63 | AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
64 | AIRFLOW__CORE__EXECUTION_API_SERVER_URL: 'http://airflow-apiserver:8080/execution/'
65 | # yamllint disable rule:line-length
66 | # Use simple http server on scheduler for health checks
67 | # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server
68 | # yamllint enable rule:line-length
69 | AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
70 | # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks
71 | # for other purpose (development, test and especially production usage) build/extend Airflow image.
72 | _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
73 | # The following line can be used to set a custom config file, stored in the local config folder
74 | AIRFLOW_CONFIG: '/opt/airflow/config/airflow.cfg'
75 | volumes:
76 | - ./project_data:/opt/airflow/dags
77 | - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
78 | - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
79 | - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
80 | user: "${AIRFLOW_UID:-50000}:0"
81 | depends_on:
82 | &airflow-common-depends-on
83 | redis:
84 | condition: service_healthy
85 | postgres:
86 | condition: service_healthy
87 |
88 | services:
89 | postgres:
90 | image: postgres:13
91 | environment:
92 | POSTGRES_USER: airflow
93 | POSTGRES_PASSWORD: airflow
94 | POSTGRES_DB: airflow
95 | volumes:
96 | - postgres-db-volume:/var/lib/postgresql/data
97 | healthcheck:
98 | test: ["CMD", "pg_isready", "-U", "airflow"]
99 | interval: 10s
100 | retries: 5
101 | start_period: 5s
102 | restart: always
103 |
104 | redis:
105 | # Redis is limited to 7.2-bookworm due to licencing change
106 | # https://redis.io/blog/redis-adopts-dual-source-available-licensing/
107 | image: redis:7.2-bookworm
108 | expose:
109 | - 6379
110 | healthcheck:
111 | test: ["CMD", "redis-cli", "ping"]
112 | interval: 10s
113 | timeout: 30s
114 | retries: 50
115 | start_period: 30s
116 | restart: always
117 |
118 | airflow-apiserver:
119 | <<: *airflow-common
120 | command: api-server
121 | ports:
122 | - "8080:8080"
123 | healthcheck:
124 | test: ["CMD", "curl", "--fail", "http://localhost:8080/api/v2/version"]
125 | interval: 30s
126 | timeout: 10s
127 | retries: 5
128 | start_period: 30s
129 | restart: always
130 | depends_on:
131 | <<: *airflow-common-depends-on
132 | airflow-init:
133 | condition: service_completed_successfully
134 |
135 | airflow-scheduler:
136 | <<: *airflow-common
137 | command: scheduler
138 | healthcheck:
139 | test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
140 | interval: 30s
141 | timeout: 10s
142 | retries: 5
143 | start_period: 30s
144 | restart: always
145 | depends_on:
146 | <<: *airflow-common-depends-on
147 | airflow-init:
148 | condition: service_completed_successfully
149 |
150 | airflow-dag-processor:
151 | <<: *airflow-common
152 | command: dag-processor
153 | healthcheck:
154 | test: ["CMD-SHELL", 'airflow jobs check --job-type DagProcessorJob --hostname "$${HOSTNAME}"']
155 | interval: 30s
156 | timeout: 10s
157 | retries: 5
158 | start_period: 30s
159 | restart: always
160 | depends_on:
161 | <<: *airflow-common-depends-on
162 | airflow-init:
163 | condition: service_completed_successfully
164 |
165 | airflow-worker:
166 | <<: *airflow-common
167 | command: celery worker
168 | healthcheck:
169 | # yamllint disable rule:line-length
170 | test:
171 | - "CMD-SHELL"
172 | - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
173 | interval: 30s
174 | timeout: 10s
175 | retries: 5
176 | start_period: 30s
177 | environment:
178 | <<: *airflow-common-env
179 | # Required to handle warm shutdown of the celery workers properly
180 | # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
181 | DUMB_INIT_SETSID: "0"
182 | restart: always
183 | depends_on:
184 | <<: *airflow-common-depends-on
185 | airflow-apiserver:
186 | condition: service_healthy
187 | airflow-init:
188 | condition: service_completed_successfully
189 |
190 | airflow-triggerer:
191 | <<: *airflow-common
192 | command: triggerer
193 | healthcheck:
194 | test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
195 | interval: 30s
196 | timeout: 10s
197 | retries: 5
198 | start_period: 30s
199 | restart: always
200 | depends_on:
201 | <<: *airflow-common-depends-on
202 | airflow-init:
203 | condition: service_completed_successfully
204 |
205 | airflow-init:
206 | <<: *airflow-common
207 | entrypoint: /bin/bash
208 | # yamllint disable rule:line-length
209 | command:
210 | - -c
211 | - |
212 | if [[ -z "${AIRFLOW_UID}" ]]; then
213 | echo
214 | echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
215 | echo "If you are on Linux, you SHOULD follow the instructions below to set "
216 | echo "AIRFLOW_UID environment variable, otherwise files will be owned by root."
217 | echo "For other operating systems you can get rid of the warning with manually created .env file:"
218 | echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user"
219 | echo
220 | export AIRFLOW_UID=$$(id -u)
221 | fi
222 | one_meg=1048576
223 | mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg))
224 | cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat)
225 | disk_available=$$(df / | tail -1 | awk '{print $$4}')
226 | warning_resources="false"
227 | if (( mem_available < 4000 )) ; then
228 | echo
229 | echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m"
230 | echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))"
231 | echo
232 | warning_resources="true"
233 | fi
234 | if (( cpus_available < 2 )); then
235 | echo
236 | echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m"
237 | echo "At least 2 CPUs recommended. You have $${cpus_available}"
238 | echo
239 | warning_resources="true"
240 | fi
241 | if (( disk_available < one_meg * 10 )); then
242 | echo
243 | echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m"
244 | echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))"
245 | echo
246 | warning_resources="true"
247 | fi
248 | if [[ $${warning_resources} == "true" ]]; then
249 | echo
250 | echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m"
251 | echo "Please follow the instructions to increase amount of resources available:"
252 | echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin"
253 | echo
254 | fi
255 | echo
256 | echo "Creating missing opt dirs if missing:"
257 | echo
258 | mkdir -v -p /opt/airflow/{logs,dags,plugins,config}
259 | echo
260 | echo "Airflow version:"
261 | /entrypoint airflow version
262 | echo
263 | echo "Files in shared volumes:"
264 | echo
265 | ls -la /opt/airflow/{logs,dags,plugins,config}
266 | echo
267 | echo "Running airflow config list to create default config file if missing."
268 | echo
269 | /entrypoint airflow config list >/dev/null
270 | echo
271 | echo "Files in shared volumes:"
272 | echo
273 | ls -la /opt/airflow/{logs,dags,plugins,config}
274 | echo
275 | echo "Change ownership of files in /opt/airflow to ${AIRFLOW_UID}:0"
276 | echo
277 | chown -R "${AIRFLOW_UID}:0" /opt/airflow/
278 | echo
279 | echo "Change ownership of files in shared volumes to ${AIRFLOW_UID}:0"
280 | echo
281 | chown -v -R "${AIRFLOW_UID}:0" /opt/airflow/{logs,dags,plugins,config}
282 | echo
283 | echo "Files in shared volumes:"
284 | echo
285 | ls -la /opt/airflow/{logs,dags,plugins,config}
286 |
287 | # yamllint enable rule:line-length
288 | environment:
289 | <<: *airflow-common-env
290 | _AIRFLOW_DB_MIGRATE: 'true'
291 | _AIRFLOW_WWW_USER_CREATE: 'true'
292 | _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
293 | _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
294 | _PIP_ADDITIONAL_REQUIREMENTS: ''
295 | user: "0:0"
296 |
297 | airflow-cli:
298 | <<: *airflow-common
299 | profiles:
300 | - debug
301 | environment:
302 | <<: *airflow-common-env
303 | CONNECTION_CHECK_MAX_COUNT: "0"
304 | # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252
305 | command:
306 | - bash
307 | - -c
308 | - airflow
309 | depends_on:
310 | <<: *airflow-common-depends-on
311 |
312 | # You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up
313 | # or by explicitly targeted on the command line e.g. docker-compose up flower.
314 | # See: https://docs.docker.com/compose/profiles/
315 | flower:
316 | <<: *airflow-common
317 | command: celery flower
318 | profiles:
319 | - flower
320 | ports:
321 | - "5555:5555"
322 | healthcheck:
323 | test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
324 | interval: 30s
325 | timeout: 10s
326 | retries: 5
327 | start_period: 30s
328 | restart: always
329 | depends_on:
330 | <<: *airflow-common-depends-on
331 | airflow-init:
332 | condition: service_completed_successfully
333 |
334 | git-sync:
335 | image: databurst/git-sync:latest
336 | volumes:
337 | - ./project_data:${DESTINATION_PATH:-/app/sync}
338 | - ${GIT_SSH_KEY:-~/.ssh/id_ed25519}:/root/.ssh/id_ed25519
339 | environment:
340 | REPO_URL: ${REPO_URL:-git@github.com:data-burst/airflow-git-sync.git}
341 | GIT_BRANCH: ${GIT_BRANCH:-main}
342 | DIRECTORY_NAME: ${DIRECTORY_NAME:-project}
343 | DESTINATION_PATH: ${DESTINATION_PATH:-/app/sync}
344 | INTERVAL: ${INTERVAL:-10}
345 | volumes:
346 | postgres-db-volume:
347 |
--------------------------------------------------------------------------------