├── .coveragerc ├── .dlc.json ├── .flake8 ├── .github ├── codecov.yml └── workflows │ ├── ci.yaml │ └── pypi.yaml ├── .gitignore ├── .isort.cfg ├── .licenserc.yaml ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── RELEASE.md ├── docs ├── Makefile ├── _static │ └── how-it-work.png ├── arch.rst ├── changelog.rst ├── cli.rst ├── conf.py ├── fake │ ├── hooks.rst │ ├── index.rst │ └── models.rst ├── howto │ ├── custom-rules.rst │ ├── filter.rst │ ├── index.rst │ └── migrate-inplace.rst ├── index.rst ├── make.bat └── start.rst ├── examples ├── airflow │ ├── bash.py │ ├── python.py │ └── tutorial.py ├── libcst │ ├── metadata.py │ └── visit_transform.py └── transfer_tutorial.py ├── setup.cfg ├── setup.py ├── src └── air2phin │ ├── __init__.py │ ├── cli │ ├── __init__.py │ └── command.py │ ├── constants.py │ ├── core │ ├── __init__.py │ ├── rules │ │ ├── __init__.py │ │ ├── config.py │ │ └── loader.py │ └── transformer │ │ ├── __init__.py │ │ ├── imports.py │ │ ├── operators.py │ │ └── route.py │ ├── fake │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── connection.py │ │ └── hook.py │ ├── hooks │ │ ├── __init__.py │ │ ├── mysql.py │ │ └── postgres.py │ ├── models │ │ ├── __init__.py │ │ └── variable.py │ └── utils │ │ ├── __init__.py │ │ └── trigger_rule.py │ ├── rules │ ├── core │ │ ├── dagContext.yaml │ │ └── removeModule.yaml │ ├── hooks │ │ ├── MySqlHook.yaml │ │ └── PostgresHook.yaml │ ├── models │ │ └── Variable.yaml │ ├── operators │ │ ├── BashOperator.yaml │ │ ├── DummyOperator.yaml │ │ ├── PostgreOperator.yaml │ │ ├── PythonOperator.yaml │ │ └── SparkSqlOperator.yaml │ └── utils │ │ └── TriggerRule.yaml │ ├── runner.py │ └── utils │ ├── __init__.py │ ├── file.py │ └── string.py ├── tests ├── __init__.py ├── cli │ ├── __init__.py │ └── test_command.py └── rules │ ├── EdgeCases.yaml │ ├── __init__.py │ ├── test_example.py │ └── test_rules.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [run] 19 | command_line = -m pytest 20 | 21 | [report] 22 | # Don’t report files that are 100% covered 23 | skip_covered = True 24 | show_missing = True 25 | precision = 2 26 | -------------------------------------------------------------------------------- /.dlc.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [ 3 | { 4 | "pattern": "^https://img.shields.io/badge" 5 | } 6 | ], 7 | "httpHeaders": [ 8 | { 9 | "urls": ["https://docs.github.com/"], 10 | "headers": { 11 | "Accept-Encoding": "zstd, br, gzip, deflate" 12 | } 13 | } 14 | ], 15 | "timeout": "10s", 16 | "retryOn429": true, 17 | "retryCount": 10, 18 | "fallbackRetryDelay": "1000s", 19 | "aliveStatusCodes": [ 20 | 200 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [flake8] 19 | max-line-length = 110 20 | exclude = 21 | .git, 22 | __pycache__, 23 | .pytest_cache, 24 | *.egg-info, 25 | build, 26 | dist, 27 | .tox, 28 | examples, 29 | tests, 30 | ignore = 31 | # D107: Don't require docstrings on __init__ 32 | D107, 33 | # D105: Missing docstring in magic method 34 | D105, 35 | # D104: Missing docstring in public package 36 | D104, 37 | # D100: Missing docstring in public module 38 | D100, 39 | # W503: Line breaks before binary operators 40 | W503, 41 | # First line should end with a period 42 | D400, 43 | # First line should be in imperative mood 44 | D401, 45 | per-file-ignores = 46 | # ignore libcst ``visit_*`` or ``leave_*`` 47 | */air2phin/core/transformer/*.py:D102 48 | -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Configuration file for codecov.io, https://docs.codecov.io/docs/codecovyml-reference 19 | 20 | codecov: 21 | branch: main 22 | bot: "codecov-io" 23 | ci: 24 | - "github.com" 25 | max_report_age: 24 26 | disable_default_path_fixes: no 27 | require_ci_to_pass: yes 28 | notify: 29 | after_n_builds: 1 30 | wait_for_ci: yes 31 | 32 | coverage: 33 | precision: 2 34 | round: down 35 | range: "70...100" 36 | status: 37 | project: 38 | default: 39 | target: auto 40 | threshold: 0% -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: CI 17 | 18 | on: 19 | push: 20 | branches: 21 | - main 22 | pull_request: 23 | 24 | concurrency: 25 | group: ci-${{ github.event.pull_request.number || github.ref }} 26 | cancel-in-progress: true 27 | 28 | # We have to update setuptools wheel to package with package_data, LICENSE, NOTICE 29 | env: 30 | DEPENDENCES: pip setuptools wheel tox 31 | 32 | jobs: 33 | license: 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: apache/skywalking-eyes/header@main 37 | dead-link: 38 | runs-on: ubuntu-latest 39 | needs: license 40 | timeout-minutes: 30 41 | steps: 42 | - uses: actions/checkout@v3 43 | - run: sudo npm install -g markdown-link-check@3.10.0 44 | - run: | 45 | for file in $(find . -name "*.md"); do 46 | markdown-link-check -c .dlc.json -q "$file" 47 | done 48 | docs: 49 | runs-on: ubuntu-latest 50 | needs: license 51 | timeout-minutes: 15 52 | steps: 53 | - uses: actions/checkout@v3 54 | - name: Set up Python 3.7 55 | uses: actions/setup-python@v4 56 | with: 57 | python-version: 3.7 58 | - name: Install Dependences 59 | run: | 60 | python -m pip install --upgrade ${{ env.DEPENDENCES }} 61 | - name: Run Build Docs Tests ${{ matrix.env-list }} 62 | run: | 63 | python -m tox -vv -e doc-build 64 | lint: 65 | timeout-minutes: 15 66 | runs-on: ubuntu-latest 67 | needs: license 68 | steps: 69 | - uses: actions/checkout@v3 70 | - name: Set up Python 3.7 71 | uses: actions/setup-python@v4 72 | with: 73 | python-version: 3.7 74 | - name: Install Dependences 75 | run: | 76 | python -m pip install --upgrade ${{ env.DEPENDENCES }} 77 | - name: Run All Lint Check 78 | run: | 79 | python -m tox -vv -e lint 80 | pytest: 81 | timeout-minutes: 15 82 | needs: lint 83 | runs-on: ${{ matrix.os }} 84 | strategy: 85 | fail-fast: false 86 | matrix: 87 | # YAML parse `3.10` to `3.1`, so we have to add quotes for `'3.10'`, see also: 88 | # https://github.com/actions/setup-python/issues/160#issuecomment-724485470 89 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] 90 | os: [ubuntu-latest, macOS-latest, windows-latest] 91 | # FIXME: python 3.6 can not find in ubuntu-latest due to https://github.com/actions/setup-python/issues/162#issuecomment-1325307787 92 | include: 93 | - python-version: '3.6' 94 | os: ubuntu-20.04 95 | - python-version: '3.6' 96 | os: macOS-latest 97 | - python-version: '3.6' 98 | os: windows-latest 99 | steps: 100 | - uses: actions/checkout@v3 101 | - name: Set up Python ${{ matrix.python-version }} 102 | uses: actions/setup-python@v4 103 | with: 104 | python-version: ${{ matrix.python-version }} 105 | - name: Install Dependences 106 | run: | 107 | python -m pip install --upgrade ${{ env.DEPENDENCES }} 108 | - name: Run All Tests 109 | run: | 110 | python -m tox -vv -e code-test 111 | - uses: codecov/codecov-action@v3 112 | # Codecov have a 100-upload limit per commit, and there are 3 * 6 files upload each time run pytest, 113 | # We should not run upload in schedule GitHub event, because the sixth day we do not change our code 114 | # and the upload limit will be reached 3 * 6 * 6. For more detail can see: 115 | # https://community.codecov.com/t/ci-failure-due-to-too-many-uploads-to-this-commit/2587/7 116 | if: ${{ github.event_name != 'schedule' }} 117 | with: 118 | token: ${{ secrets.CODECOV_TOKEN }} 119 | files: ./coverage.xml 120 | flags: unittests 121 | name: codecov-umbrella 122 | fail_ci_if_error: false 123 | verbose: true 124 | local-ci: 125 | timeout-minutes: 15 126 | needs: 127 | - pytest 128 | runs-on: ubuntu-latest 129 | steps: 130 | - uses: actions/checkout@v3 131 | - name: Set up Python 3.7 132 | uses: actions/setup-python@v4 133 | with: 134 | python-version: 3.7 135 | - name: Install Dependences 136 | run: | 137 | python -m pip install --upgrade ${{ env.DEPENDENCES }} 138 | - name: Run Tests Build Docs 139 | run: | 140 | python -m tox -vv -e local-ci 141 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yaml: -------------------------------------------------------------------------------- 1 | name: pypi 2 | 3 | on: 4 | push: 5 | tags: 6 | - "**" 7 | 8 | env: 9 | DEPENDENCES: pip setuptools wheel build 10 | 11 | jobs: 12 | publish: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - uses: actions/setup-python@v4 17 | with: 18 | python-version: "3.8" 19 | architecture: "x64" 20 | - name: Install Dependencies 21 | run: | 22 | python -m pip install --upgrade ${{ env.DEPENDENCES }} 23 | - run: python -m build 24 | - name: Publish package to PyPI 25 | uses: pypa/gh-action-pypi-publish@release/v1 26 | with: 27 | password: ${{ secrets.PYPI_API_TOKEN }} 28 | verbose: true 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Editor 2 | .idea/ 3 | .vscode/ 4 | 5 | # Cache 6 | __pycache__/ 7 | .tox/ 8 | .pytest_cache/ 9 | .DS_Store 10 | _build/ 11 | 12 | # Build 13 | build/ 14 | dist/ 15 | *egg-info/ 16 | 17 | # Test coverage 18 | .coverage 19 | coverage.xml 20 | htmlcov/ 21 | 22 | # Git patch 23 | *.patch 24 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [settings] 19 | profile=black 20 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to Apache Software Foundation (ASF) under one or more contributor 2 | # license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright 4 | # ownership. Apache Software Foundation (ASF) licenses this file to you under 5 | # the Apache License, Version 2.0 (the "License"); you may 6 | # not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | header: 19 | license: 20 | spdx-id: Apache-2.0 21 | copyright-owner: Apache Software Foundation 22 | 23 | paths-ignore: 24 | - LICENSE 25 | - 'dist' 26 | - '**/*.md' 27 | - '**/*.md' 28 | - '.github/**' 29 | 30 | comment: on-failure 31 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # See https://pre-commit.com for more information 19 | # See https://pre-commit.com/hooks.html for more hooks 20 | 21 | default_stages: [commit, push] 22 | default_language_version: 23 | # force all python hooks to run python3 24 | python: python3 25 | repos: 26 | # Python API Hooks 27 | - repo: https://github.com/pycqa/isort 28 | rev: 5.10.1 29 | hooks: 30 | - id: isort 31 | name: isort (python) 32 | - repo: https://github.com/psf/black 33 | rev: 22.3.0 34 | hooks: 35 | - id: black 36 | - repo: https://github.com/pycqa/flake8 37 | rev: 3.8.0 38 | hooks: 39 | - id: flake8 40 | pass_filenames: false 41 | additional_dependencies: [ 42 | 'flake8-docstrings>=1.6', 43 | 'flake8-black>=0.2', 44 | ] 45 | # pre-commit run in the root, so we have to point out the full path of configuration 46 | args: [ 47 | --config, 48 | .flake8 49 | ] 50 | - repo: https://github.com/pycqa/autoflake 51 | rev: v1.4 52 | hooks: 53 | - id: autoflake 54 | args: [ 55 | --remove-all-unused-imports, 56 | --ignore-init-module-imports, 57 | --in-place 58 | ] 59 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.9" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # If using Sphinx, optionally build your docs in additional formats such as PDF 19 | # formats: 20 | # - pdf 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | install: 25 | - method: pip 26 | path: . 27 | extra_requirements: 28 | - dev 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Air2phin 2 | 3 | [![PyPi Version](https://img.shields.io/pypi/v/air2phin.svg?style=flat-square&logo=PyPi)](https://pypi.org/project/air2phin/) 4 | [![PyPi Python Versions](https://img.shields.io/pypi/pyversions/air2phin.svg?style=flat-square&logo=python)](https://pypi.org/project/air2phin/) 5 | [![PyPi License](https://img.shields.io/:license-Apache%202-blue.svg?style=flat-square)](https://raw.githubusercontent.com/WhaleOps/air2phin/main/LICENSE) 6 | [![PyPi Status](https://img.shields.io/pypi/status/air2phin.svg?style=flat-square)](https://pypi.org/project/air2phin/) 7 | [![Downloads](https://pepy.tech/badge/air2phin/month)](https://pepy.tech/project/air2phin) 8 | [![Coverage Status](https://img.shields.io/codecov/c/github/WhaleOps/air2phin/main.svg?style=flat-square)](https://codecov.io/github/WhaleOps/air2phin?branch=main) 9 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black) 10 | [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat-square&labelColor=ef8336)](https://pycqa.github.io/isort) 11 | [![CI](https://github.com/WhaleOps/air2phin/actions/workflows/ci.yaml/badge.svg)](https://github.com/WhaleOps/air2phin/actions/workflows/ci.yaml) 12 | [![Documentation Status](https://readthedocs.org/projects/air2phin/badge/?version=latest)](https://air2phin.readthedocs.io/en/latest/?badge=latest) 13 | 14 | air2phin is a tool for migrating Airflow DAGs to DolphinScheduler Python API. 15 | 16 | ## Installation 17 | 18 | For now, it just for test and without publish to pypi but will be adding in the future. 19 | You could still install locally by yourself. 20 | 21 | ```shell 22 | python -m pip install --upgrade air2phin 23 | ``` 24 | 25 | ## Quick Start 26 | 27 | Here will give a quick example to show how to migrate base on standard input. 28 | 29 | ```shell 30 | # Quick test the migrate rule for standard input 31 | # Can also add option `--diff` to see the diff detail of this migrate 32 | air2phin test "from airflow.operators.bash import BashOperator 33 | 34 | test = BashOperator( 35 | task_id='test', 36 | bash_command='echo 1', 37 | ) 38 | " 39 | ``` 40 | 41 | And you will see the migrated result in the standard output. air2phin can only migrate standard input, it can 42 | also migrate file, directory and even can use in your python code. For more detail, please see [our usage](https://air2phin.readthedocs.io/en/latest/start.html#usage). 43 | 44 | ## Documentation 45 | 46 | The documentation host on read the doc and is available at [https://air2phin.readthedocs.io](https://air2phin.readthedocs.io). 47 | 48 | ## Support Statement 49 | 50 | For now, we support following statement from Airflow's DAG files 51 | 52 | ### DAG 53 | 54 | | Before Migration | After Migration | 55 | |------------------------------|----------------------------------------------------------------------------| 56 | | `from airflow import DAG` | `from pydolphinscheduler.core.process_definition import ProcessDefinition` | 57 | | `with DAG(...) as dag: pass` | `with ProcessDefinition(...) as dag: pass` | 58 | 59 | ### Operators 60 | 61 | #### Dummy Operator 62 | 63 | | Before Migration | After Migration | 64 | |--------------------------------------------------------------|---------------------------------------------------------------| 65 | | `from airflow.operators.dummy_operator import DummyOperator` | `from pydolphinscheduler.tasks.shell import Shell` | 66 | | `from airflow.operators.dummy import DummyOperator` | `from pydolphinscheduler.tasks.shell import Shell` | 67 | | `dummy = DummyOperator(...)` | `dummy = Shell(..., command="echo 'airflow dummy operator'")` | 68 | 69 | #### Shell Operator 70 | 71 | | Before Migration | After Migration | 72 | |---------------------------------------------------|----------------------------------------------------| 73 | | `from airflow.operators.bash import BashOperator` | `from pydolphinscheduler.tasks.shell import Shell` | 74 | | `bash = BashOperator(...)` | `bash = Shell(...)` | 75 | 76 | #### Spark Sql Operator 77 | 78 | | Before Migration | After Migration | 79 | |---------------------------------------------------------------------|------------------------------------------------| 80 | | `from airflow.operators.spark_sql_operator import SparkSqlOperator` | `from pydolphinscheduler.tasks.sql import Sql` | 81 | | `spark = SparkSqlOperator(...)` | `spark = Sql(...)` | 82 | 83 | #### Python Operator 84 | 85 | | Before Migration | After Migration | 86 | |----------------------------------------------------------------|------------------------------------------------------| 87 | | `from airflow.operators.python_operator import PythonOperator` | `from pydolphinscheduler.tasks.python import Python` | 88 | | `python = PythonOperator(...)` | `python = Python(...)` | 89 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Release 21 | 22 | [PyPI](https://pypi.org), Python Package Index, is a repository of software for the Python programming language. 23 | 24 | ## Build Package 25 | 26 | We use [build](https://pypi.org/project/build/) to build package, and [twine](https://pypi.org/project/twine/) to 27 | upload package to PyPi. You could first install and upgrade them by: 28 | 29 | ```bash 30 | # Install or upgrade dependencies 31 | python3 -m pip install --upgrade pip build twine 32 | 33 | # Change version 34 | VERSION= # The version of the package you want to release, e.g. 1.2.3 35 | # For macOS 36 | sed -i '' "s/__version__ = \".*\"/__version__ = \"${VERSION}\"/" src/air2phin/__init__.py 37 | # For Linux 38 | sed -i "s/__version__ = \".*\"/__version__ = \"${VERSION}\"/" src/air2phin/__init__.py 39 | 40 | git commit -am "Release v${VERSION}" 41 | 42 | # Build and sign according to the Apache requirements 43 | python setup.py clean && python3 -m build 44 | ``` 45 | 46 | It is highly recommended [releasing package to TestPyPi](#release-to-testpypi) first, to check whether the 47 | package is correct, and then [release to PyPi](#release-to-pypi). 48 | 49 | ## Release to TestPyPi 50 | 51 | TestPyPi is a test environment of PyPi, you could release to it to test whether the package is work or not. 52 | 53 | 1. Upload to TestPyPi `python3 -m twine upload --repository testpypi dist/*`. 54 | 2. Check the package in [TestPyPi](https://test.pypi.org/project/air2phin/) and install it 55 | by `python3 -m pip install --index-url https://test.pypi.org/simple/ --no-deps air2phin` to 56 | test whether it is work or not. 57 | 58 | ## Release to PyPi 59 | 60 | PyPi is the official repository of Python packages, it is highly recommended [releasing package to TestPyPi](#release-to-testpypi) 61 | first to test whether the package is correct. 62 | 63 | ### Automatically 64 | 65 | After you check the package in TestPyPi is correct, you can directly tag the commit and push it to GitHub, then 66 | GitHub Actions will automatically release to PyPi based on the tag event. You can see more detail in [pypi-workflow.yml](.github/workflows/pypi.yaml). 67 | 68 | ```shell 69 | # Add Tag 70 | VERSION= # The version of the package you want to release, e.g. 1.2.3 71 | REMOTE= # The git remote name, we usually use `origin` or `remote` 72 | git tag -a "${VERSION}" -m "Release v${VERSION}" 73 | git push "${REMOTE}" --tags 74 | ``` 75 | 76 | ### Manually 77 | 78 | 1. Upload to TestPyPi `python3 -m twine upload dist/*`. 79 | 2. Check the package in [PyPi](https://pypi.org/project/air2phin/) and install it 80 | by `python3 -m pip install air2phin` to install it. 81 | 82 | ## Ref 83 | 84 | There is an official way to package project from [PyPA](https://packaging.python.org/en/latest/tutorials/packaging-projects) 85 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/how-it-work.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/docs/_static/how-it-work.png -------------------------------------------------------------------------------- /docs/arch.rst: -------------------------------------------------------------------------------- 1 | Architecture 2 | ============ 3 | 4 | Air2phin is a rule-based AST transformer, using `LibCST `_ to parse and transform Python code, 5 | and Yaml file to define the transformation rules. 6 | 7 | The main data flow is as below: 8 | 9 | .. image:: _static/how-it-work.png 10 | :width: 700 11 | :alt: data flow 12 | 13 | And the steps of the transformation are: 14 | 15 | - Get source content from files of stdin. 16 | - Load all the rules from the YAML file. 17 | - Parse the source content into a LibCST. 18 | - Transform the CST tree based on the rules. 19 | - Provide the result to original f ile path with :code:`-air2phin` as stem suffix(by default, also support :doc:`inplace migrate `) or stdout. -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | .. changelog:: 5 | :changelog-url: https://air2phin.readthedocs.io/en/latest/changelog.html 6 | :github: https://github.com/WhaleOps/air2phin/releases/ 7 | :pypi: https://pypi.org/project/air2phin/ 8 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | CLI 2 | === 3 | 4 | This section describes all command line interfaces to the Air2phin. You can see :ref:`quick start ` if 5 | you want to get started quickly. 6 | 7 | .. argparse:: 8 | :module: air2phin.cli.command 9 | :func: build_argparse 10 | :prog: air2phin -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | import os 9 | import sys 10 | 11 | sys.path.insert(0, os.path.abspath(os.path.join("..", "src"))) 12 | 13 | from air2phin import __version__ # noqa 14 | 15 | project = "air2phin" 16 | copyright = "2023, Jay Chung" 17 | author = "Jay Chung" 18 | release = __version__ 19 | 20 | # -- General configuration --------------------------------------------------- 21 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 22 | 23 | extensions = [ 24 | # Measures durations of Sphinx processing 25 | "sphinx.ext.duration", 26 | "sphinx.ext.autosectionlabel", 27 | "sphinx_copybutton", 28 | # argparse 29 | "sphinxarg.ext", 30 | # changelog 31 | "sphinx_github_changelog", 32 | ] 33 | 34 | # -- Extensions configuration ------------------------------------------------- 35 | # autosectionlabel 36 | autosectionlabel_prefix_document = True 37 | 38 | templates_path = ["_templates"] 39 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 40 | 41 | # -- Options for HTML output ------------------------------------------------- 42 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 43 | 44 | html_theme = "sphinx_rtd_theme" 45 | html_static_path = ["_static"] 46 | -------------------------------------------------------------------------------- /docs/fake/hooks.rst: -------------------------------------------------------------------------------- 1 | fake.hooks 2 | ========== 3 | 4 | Some users will use hook in Python operator or custom operator, for example, we have a custom Python task like: 5 | 6 | .. code-block:: python 7 | 8 | from contextlib import closing 9 | from airflow.operators.python import PythonOperator 10 | from airflow.providers.postgres.hooks.postgres import PostgresHook 11 | 12 | def demo(): 13 | connection = PostgresHook.get_connection("postgres_default") 14 | hook = PostgresHook(connection=connection) 15 | with closing(hook.get_conn()) as conn: 16 | with closing(conn.cursor()) as cursor: 17 | cursor.execute("SELECT 1") 18 | print(cursor.fetchall()) 19 | 20 | demo = PythonOperator( 21 | task_id="demo", 22 | python_callable=test, 23 | ) 24 | 25 | when you use air2phin to migrate it to dolphinscheduler python SDK, the result will be: 26 | 27 | .. code-block:: python 28 | 29 | from contextlib import closing 30 | from pydolphinscheduler.tasks.python import Python 31 | from airflow.providers.postgres.hooks.postgres import PostgresHook 32 | 33 | def demo(): 34 | connection = PostgresHook.get_connection("postgres_default") 35 | hook = PostgresHook(connection=connection) 36 | with closing(hook.get_conn()) as conn: 37 | with closing(conn.cursor()) as cursor: 38 | cursor.execute("SELECT 1") 39 | print(cursor.fetchall()) 40 | 41 | demo = Python( 42 | name="demo", 43 | definition=test, 44 | ) 45 | 46 | As you can see, the task ``demo``'s class name and its attributes is migrated to dolphinscheduler's, but the function 47 | ``demo`` is not migrated, because it is a function and not an airflow task. 48 | 49 | We can find it used :code:`airflow.PostgresHook` to connect to airflow's metadata database, and then execute SQL. 50 | This code cannot be run in dolphinscheduler python SDK, because :code:`airflow.PostgresHook` is a concept of airflow 51 | only. There are two ways if you want it successfully run by dolphinscheduler python SDK, one is to rewrite the function 52 | to make it work with dolphinscheduler, another is to use :code:`air2phin.fake` without any modification. 53 | 54 | Usage 55 | ----- 56 | 57 | Basic Usage 58 | ~~~~~~~~~~~ 59 | 60 | When you run :code:`air2phin migrate` subcommand, it will automatically detect if there are any :code:`airflow.hooks` 61 | in your DAG. If so, it will automatically migrate the hook module into module :code:`air2phin.fake`, which means 62 | you can do nothing for hook migration. 63 | 64 | .. note:: 65 | 66 | Module :code:`air2phin.fake` only support two hooks migration, which are :code:`airflow.PostgresHook` 67 | and :code:`airflow.MySqlHook`. If you want to migrate other hooks, you can use :doc:`custom rules <../howto/custom-rules>` 68 | 69 | With :code:`air2phin.fake` module, the original DAG can be migrated to: 70 | 71 | .. code-block:: python 72 | 73 | from contextlib import closing 74 | from pydolphinscheduler.tasks.python import Python 75 | from air2phin.fake.hooks.postgres import PostgresHook 76 | 77 | def demo(): 78 | connection = PostgresHook.get_connection("postgres_default") 79 | hook = PostgresHook(connection=connection) 80 | with closing(hook.get_conn()) as conn: 81 | with closing(conn.cursor()) as cursor: 82 | cursor.execute("SELECT 1") 83 | print(cursor.fetchall()) 84 | 85 | demo = Python( 86 | name="demo", 87 | definition=test, 88 | ) 89 | 90 | And you can see air2phin migrate the hook module from :code:`airflow.providers.postgres.hooks.postgres.PostgresHook` to 91 | :code:`air2phin.fake.hooks.postgres.PostgresHook`. When you run the code in dolphinscheduler, :code:`air2phin.fake` will 92 | query dolphinscheduler metadata database to get the connection information, you can use it just like you use 93 | :code:`airflow.providers.postgres.hooks.postgres.PostgresHook` 94 | 95 | Requirement 96 | ^^^^^^^^^^^ 97 | 98 | - The network of dolphinscheduler workers can connect to dolphinscheduler metadata database is be required. Because 99 | :code:`air2phin.fake` will query the connection information from dolphinscheduler metadata database. 100 | - The data source named ``postgres_default`` (same as airflow's connection) must exist in dolphinscheduler metadata 101 | database for air2phin.fake to get the connection information. 102 | - Methods can connect to the dolphinscheduler metabase, any one of the following is acceptable: 103 | 104 | - Package `pydolphinscheduler `_ is installed in the 105 | dolphinscheduler worker's python environment, and make sure the 106 | `token is correct `_. 107 | 108 | - An environment variable named ``AIR2PHIN_FAKE_CONNECTION`` set with the connection information of the 109 | dolphinscheduler metadata database. It is use 110 | `sqlalchemy connection string format `_ 111 | for example: :code:`postgresql+psycopg2://scott:tiger@localhost:5432/mydatabase`. We recommend you use 112 | dolphinscheduler's `Environmental Management `_ 113 | to do that, all you should do is add a new environment with content like 114 | 115 | .. code-block:: bash 116 | 117 | export AIR2PHIN_FAKE_CONNECTION=postgresql+psycopg2://scott:tiger@localhost:5432/mydatabase 118 | 119 | and use it in your dolphinscheduler's Python task. 120 | 121 | .. note:: 122 | 123 | The priority of package ``pydolphinscheduler`` is higher than the environment variables 124 | ``AIR2PHIN_FAKE_CONNECTION``. If you want less connections for your dolphinscheduler metadata database, 125 | please use package pydolphinscheduler which will reuse the connection pool of dolphinscheduler itself. 126 | But if you do not case much of the connections number(such as do not have many tasks using air2phin.fake), 127 | or do not want to install pydolphinscheduler dolphinscheduler worker, environment variable ``AIR2PHIN_FAKE_CONNECTION`` 128 | is a better choice. 129 | 130 | With Non-unique Datasource Name 131 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 132 | 133 | Dolphinscheduler datasource uses joint unique index :code:`(type, name)` to ensure the datasource name is unique. 134 | but the airflow connection id is unique. So when your dolphinscheduler metadata database has two datasource with the same name, 135 | air2phin.fake will raise an error, in this case, you should add the type of datasource 136 | 137 | .. code-block:: python 138 | 139 | # When you have two datasource named "postgres_default" in the dolphinscheduler metadata database 140 | from air2phin.fake.hooks.postgres import PostgresHook 141 | connection = PostgresHook.get_connection("postgres_default") 142 | 143 | # You should add the type of datasource, into the format "type.name" 144 | from air2phin.fake.hooks.postgres import PostgresHook 145 | connection = PostgresHook.get_connection("postgres.postgres_default") 146 | 147 | or you can change your datasource name to make it unique. 148 | 149 | .. code-block:: python 150 | 151 | # Change the datasource name to make it unique, for example, change from "postgres_default" to "postgres_default_uniq" 152 | from air2phin.fake.hooks.postgres import PostgresHook 153 | connection = PostgresHook.get_connection("postgres_default_uniq") 154 | 155 | And dolphinscheduler only supports the following types of datasource, which mean your type must be one of them: 156 | 157 | - mysql 158 | - postgresql 159 | - hive 160 | - spark 161 | - clickhouse 162 | - oracle 163 | - sqlserver 164 | - db2 165 | - presto 166 | - h2 167 | - redshift 168 | - dameng 169 | - starrocks 170 | -------------------------------------------------------------------------------- /docs/fake/index.rst: -------------------------------------------------------------------------------- 1 | Fake Module 2 | =========== 3 | 4 | Module Fake is an advanced usage of air2phin, which makes it possible for some airflow's DAG contain :code:`airflow.hooks` 5 | or other airflow's concept can migrate and run in dolphinscheduler without modifying any hook code. 6 | 7 | .. note:: 8 | 9 | 1. The network of dolphinscheduler worker can connect to dolphinscheduler metadata database is be required, when 10 | you want to execute the migrated DAG with :code:`air2phin.fake`. 11 | 12 | 2. Air2phin fake module is an option dependency, which means you can install it via pip with :code:`air2phin[fake]`. 13 | 14 | .. code-block:: bash 15 | 16 | python -m pip install --upgrade air2phin[fake] 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | hooks 22 | models 23 | -------------------------------------------------------------------------------- /docs/fake/models.rst: -------------------------------------------------------------------------------- 1 | fake.models 2 | =========== 3 | 4 | Some fake for :code:`airlfow.models` about get object from database. 5 | 6 | variable 7 | -------- 8 | 9 | For :code:`airlfow.models.Variable` and return what we pass to it. 10 | 11 | .. code-block:: python 12 | 13 | from air2phin.models import Variable 14 | 15 | var = Variable.get("var") 16 | print(var) 17 | # var 18 | -------------------------------------------------------------------------------- /docs/howto/custom-rules.rst: -------------------------------------------------------------------------------- 1 | Custom Rules 2 | ============ 3 | 4 | Create Custom Rule 5 | ------------------ 6 | 7 | Sometimes, you need to add some custom rules to your migration. For example, you have some custom airflow operators 8 | base on the existing ``PostgresOperator`` and you want to migrate them to dolphinscheduler. The custom operator 9 | definition is like this: 10 | 11 | .. code-block:: python 12 | 13 | # Just create a custom operator base on PostgresOperator, and do nothing except change the connection 14 | # arguments name from ``postgres_conn_id`` to ``my_custom_conn_id`` 15 | from airflow.providers.postgres.operators.postgres import PostgresOperator 16 | 17 | class MyCustomOperator(PostgresOperator): 18 | def __init__( 19 | self, 20 | *, 21 | sql: str | Iterable[str], 22 | my_custom_conn_id: str = 'postgres_default', 23 | autocommit: bool = False, 24 | parameters: Iterable | Mapping | None = None, 25 | database: str | None = None, 26 | runtime_parameters: Mapping | None = None, 27 | **kwargs, 28 | ) -> None: 29 | super().__init__( 30 | sql=sql, 31 | postgres_conn_id=my_custom_conn_id, 32 | autocommit=autocommit, 33 | parameters=parameters, 34 | database=database, 35 | runtime_parameters=runtime_parameters, 36 | **kwargs, 37 | ) 38 | 39 | You put this operator same directory as your DAG file. and your airflow dags files are like this: 40 | 41 | .. code-block:: text 42 | 43 | dags 44 | └── dag.py 45 | custom 46 | └── my_custom_operator.py 47 | 48 | And in the file ``dag.py``, you use this custom operator like this: 49 | 50 | .. code-block:: python 51 | 52 | from custom.my_custom_operator import MyCustomOperator 53 | 54 | with DAG( 55 | dag_id='my_custom_dag', 56 | default_args=default_args, 57 | schedule_interval='@once', 58 | start_date=days_ago(2), 59 | tags=['example'], 60 | ) as dag: 61 | t1 = MyCustomOperator( 62 | task_id='my_custom_task', 63 | sql='select * from table', 64 | my_custom_conn_id='my_custom_conn_id', 65 | ) 66 | 67 | 68 | In this case, you can add a custom rule file named ``MyCustomOperator.yaml`` to tell air2phin what you want to do 69 | during the migration 70 | 71 | .. code-block:: yaml 72 | 73 | name: MyCustomOperator 74 | description: The configuration for migrating airflow custom operator MyCustomOperator to DolphinScheduler SQL task. 75 | 76 | migration: 77 | module: 78 | - action: replace 79 | src: custom.my_custom_operator.MyCustomOperator 80 | dest: pydolphinscheduler.tasks.sql.Sql 81 | parameter: 82 | - action: replace 83 | src: task_id 84 | dest: name 85 | - action: replace 86 | src: my_custom_conn_id 87 | dest: datasource_name 88 | 89 | Use Custom Rule 90 | --------------- 91 | 92 | Save the YAML config file to any directory you want, and declare the path when you run the ``air2phin`` command: 93 | 94 | .. code-block:: bash 95 | 96 | air2phin migrate --custom-rules /path/to/MyCustomOperator.yaml ~/airflow/dags/dag.py 97 | 98 | And you can see the new DAG file directory ``~/airflow/dags`` named ``dag-air2phin.py`` is created which is the 99 | migrated result of ``dag.py``. 100 | 101 | Use Multiple Custom Rules 102 | ------------------------- 103 | 104 | Air2phin also supports using multiple custom rules in a single migration, and has directory and scatter files due 105 | to different files organized. 106 | 107 | In Single File and Directory 108 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 109 | 110 | When all custom rules are in one single file or directory, use single options argument :code:`--custom-rules` or :code:`-r` 111 | can use them 112 | 113 | .. code-block:: bash 114 | 115 | # single file 116 | air2phin migrate --custom-rules /path/to/MyCustomOperator.yaml ~/airflow/dags/dag.py 117 | 118 | # single directory 119 | air2phin migrate --custom-rules /path/to/rules/dir ~/airflow/dags/dag.py 120 | 121 | In Scatter Files or Directories 122 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 123 | 124 | Sometimes, our rules will be in the different places, and air2phin support the use option argument :code:`--custom-rules` or :code:`-r` 125 | multiple times in one single migration 126 | 127 | .. code-block:: bash 128 | 129 | # multiple files 130 | air2phin migrate --custom-rules /path/to/MyCustomOperator1.yaml --custom-rules /path/to/MyCustomOperator2.yaml ~/airflow/dags/dag.py 131 | 132 | # multiple directories 133 | air2phin migrate --custom-rules /path/to/rules/dir1 --custom-rules /path/to/rules/dir2 ~/airflow/dags/dag.py 134 | 135 | # multiple mixed files and directories 136 | air2phin migrate --custom-rules /path/to/MyCustomOperator1.yaml --custom-rules /path/to/rules/dir1 ~/airflow/dags/dag.py 137 | 138 | Use Custom Rule Only Without Built-ins 139 | --------------------------------------- 140 | 141 | All the above examples using custom rules combine built-in rules and customs, sometimes we just want to apply 142 | the custom rule to migrate existing files, just like we apply a patch to our codebase. We can use option argument 143 | :code:`--custom-only` or :code:`-R` to use custom rules and ignore built-in. 144 | 145 | .. code-block:: bash 146 | 147 | # Only use custom rules and ignore built-in one 148 | air2phin migrate --custom-rules /path/to/MyCustomOperator1.yaml --custom-only ~/airflow/dags/dag.py 149 | 150 | It is useful when you have lots of files to migrate, if you found some code should change again after the first 151 | migration run, but do not want to apply all the rules which cost lots of time, you can try to use this feature. 152 | 153 | Use Rule Override 154 | ----------------- 155 | 156 | Custom rules provide the ability to override built-in rules. Sometimes we want to override the built-in migrate 157 | rules by custom one, we can use the same name as the built-in rule when you specify the custom rule. 158 | 159 | For example, we have the build-in rule ``PythonOperator.yaml``, and the content as below: 160 | 161 | .. code-block:: yaml 162 | 163 | name: PythonOperator 164 | description: The configuration for migrating Airflow PythonOperator to DolphinScheduler Python task. 165 | 166 | migration: 167 | module: 168 | - action: replace 169 | src: 170 | - airflow.operators.python_operator.PythonOperator 171 | - airflow.operators.python.PythonOperator 172 | dest: pydolphinscheduler.tasks.python.Python 173 | parameter: 174 | - action: replace 175 | src: task_id 176 | dest: name 177 | - action: replace 178 | src: python_callable 179 | dest: definition 180 | 181 | If you want to run those python task base on the dolphinscheduler specific environment, the best practice is to use rule 182 | override. Create a custom rule with the name ``CustomPythonOperator.yaml`` with content 183 | 184 | .. code-block:: yaml 185 | 186 | name: PythonOperator 187 | description: The configuration for migrating Airflow PythonOperator to DolphinScheduler Python task. 188 | 189 | migration: 190 | module: 191 | - action: replace 192 | src: 193 | - airflow.operators.python_operator.PythonOperator 194 | - airflow.operators.python.PythonOperator 195 | dest: pydolphinscheduler.tasks.python.Python 196 | parameter: 197 | - action: replace 198 | src: task_id 199 | dest: name 200 | - action: replace 201 | src: python_callable 202 | dest: definition 203 | - action: add 204 | arg: environment_name 205 | default: 206 | type: str 207 | value: airflow_migrate 208 | 209 | We do nothing but add five new lines(Note that the ``name`` attribute in ``CustomPythonOperator.yaml`` is the 210 | same as the value of built-in on in ``PythonOperator.yaml``) 211 | 212 | .. code-block:: yaml 213 | 214 | - action: add 215 | arg: environment_name 216 | default: 217 | type: str 218 | value: airflow_migrate 219 | 220 | in ``CustomPythonOperator.yaml`` to tell air2phin add one new argument name ``environment_name`` with default 221 | value ``airflow_migrate``, then we can use it by command 222 | 223 | .. code-block:: bash 224 | 225 | air2phin migrate --custom-rules CustomPythonOperator.yaml ~/airflow/dags/dag.py 226 | 227 | ``PythonOperator.yaml`` will be overridden by ``CustomPythonOperator.yaml`` due to ``CustomPythonOperator.yaml`` 228 | have the same name and ``CustomPythonOperator.yaml`` is the custom rule. 229 | 230 | .. note:: 231 | 232 | We use the ``name`` attribute in the file content instead of the filename of identification 233 | -------------------------------------------------------------------------------- /docs/howto/filter.rst: -------------------------------------------------------------------------------- 1 | Filter Files with Pattern 2 | ========================= 3 | 4 | Air2phin will only migrate files that pass its filter rule, default is :code:`*.py` (include all python files in the given path). 5 | 6 | Custom Include Pattern 7 | ---------------------- 8 | 9 | Migration include files behavior can be overwritten by option argument :code:`--include` or :code:`-I`, for example 10 | if you want to migrate all python files with the prefix ``airflow``, you can use a single command 11 | 12 | .. code-block:: bash 13 | 14 | air2phin migrate --include 'airflow*.py' /PATH/TO/DIRECTORY 15 | 16 | Custom Exclude Pattern 17 | ---------------------- 18 | 19 | Migration all files exclude some files or directory can use option argument :code:`--exclude` or :code:`-E`, for example 20 | if you want to exclude all python files in package ``utils``, you can use the command 21 | 22 | .. code-block:: bash 23 | 24 | air2phin migrate --exclude 'utils/*.py' /PATH/TO/DIRECTORY 25 | 26 | .. note:: 27 | 28 | Both include and exclude option argument respect `Path.rglob `_ 29 | rule, if you want to include add Python files match ``dag-*.py`` in the directory ``~/airflow/dags`` expect ``utils`` directory, you can 30 | use :code:`air2phin migrate --include 'dag-*.py' --exclude 'utils/*' ~/airflow/dags` 31 | 32 | For more detail please see :doc:`../cli`. 33 | -------------------------------------------------------------------------------- /docs/howto/index.rst: -------------------------------------------------------------------------------- 1 | HOWTOS 2 | ====== 3 | 4 | The useful guides to use air2phin. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | custom-rules 10 | migrate-inplace 11 | filter -------------------------------------------------------------------------------- /docs/howto/migrate-inplace.rst: -------------------------------------------------------------------------------- 1 | Migration Files Inplace 2 | ======================= 3 | 4 | Air2phin will create a new file with the addition ``-ariphin`` as a stem suffix with users run migrate command 5 | :code:`air2phin migrate /PATH/TO/FILE`, which means the new file named ``/PATH/TO/FILE-air2phin`` will be created and 6 | all migrated contents in ``/PATH/TO/FILE-air2phin``. 7 | 8 | If you want to replace the existing file with the migrated content directly instead of creating a new file to keep 9 | them, you can use the options argument :code:`--inplace` or :code:`-i` for :code:`air2phin migrate` subcommand. Option 10 | :code:`--inplace` and :code:`-i` work for multiple files and directories, for more detail please see :doc:`../cli` 11 | 12 | .. note:: 13 | 14 | Please make sure you already backup all of your source files, or make sure they under version control system 15 | when you run migrate command with option argument :code:`--inplace`, otherwise your source content will be lost. 16 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. air2phin documentation master file, created by 2 | sphinx-quickstart on Sat Dec 3 23:11:01 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Air2phin's Documentation 7 | ======================== 8 | 9 | Air2phin is a tool for migrating Airflow DAGs to DolphinScheduler Python API. 10 | 11 | It is a rule-based AST transformer, using `LibCST `_ to parse and transform Python code, 12 | and Yaml file to define the transformation rules. 13 | 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | 18 | start 19 | cli 20 | fake/index 21 | howto/index 22 | arch 23 | changelog 24 | 25 | Indices and tables 26 | ================== 27 | 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/start.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | Install 5 | ------- 6 | 7 | Air2phin releases are available on PyPI. The easiest way to install Air2phin is via pip, and we highly recommend using 8 | the latest version 9 | 10 | .. code-block:: bash 11 | 12 | python -m pip install --upgrade air2phin 13 | 14 | Usage 15 | ----- 16 | 17 | The easiest way to use Air2phin is via `cli`_, and you can also use API if you prefer to `programming`_ 18 | 19 | CLI 20 | ~~~ 21 | 22 | Standard input 23 | ^^^^^^^^^^^^^^ 24 | 25 | The CLI is the easiest way to use Air2phin, we will take a quick look at the basic usage of the CLI. 26 | 27 | .. code-block:: bash 28 | 29 | # Show all arguments or subcommands of air2phin 30 | air2phin --help 31 | 32 | # A simple migration example base on stdin, and will show the migrated result on stdout 33 | # Can add option argument `--diff` to see the diff detail of this migrate 34 | air2phin test "from airflow.operators.bash import BashOperator 35 | 36 | test = BashOperator( 37 | task_id='test', 38 | bash_command='echo 1', 39 | ) 40 | " 41 | 42 | Single File 43 | ^^^^^^^^^^^ 44 | 45 | After running the above command, will get the migrated result shown in stdout. And the most common usage is to migrate 46 | an existing Airflow DAG file, which can be done by :code:`air2phin migrate` command. We have an out of box example: 47 | 48 | .. code-block:: bash 49 | 50 | # Get an example airflow DAG file via terminal 51 | wget https://raw.githubusercontent.com/WhaleOps/air2phin/main/examples/airflow/tutorial.py 52 | 53 | # Run migrate command 54 | air2phin migrate tutorial.py 55 | 56 | And the migrated result will in the same directory with the stem suffix :code:`-air2phin` (by default, also support 57 | :doc:`inplace migrate `), in this case, it will be `tutorial-air2phin.py` in the current directory. 58 | 59 | Multiple Files or Directories 60 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 61 | 62 | air2phin can not only migrate one single file but also works for multiple files and even the whole directory DAGs file, 63 | 64 | .. code-block:: bash 65 | 66 | # Migrate multiple files 67 | air2phin migrate /PATH/TO/FILE1.py /PATH/TO/FILE2.py 68 | 69 | # Migrate all *.py files in directory 70 | air2phin migrate 71 | 72 | # Migrate all *.py file Airflow DAG directory 73 | air2phin migrate ~/airflow/dags 74 | 75 | Same as `single file`_, the migrated result is in the same directory as the source file, with stem suffix :code:`-air2phin` 76 | (by default, also support :doc:`inplace migrate `) to marked. 77 | 78 | If you want to deep dive into the CLI, please check the :doc:`cli` section. 79 | 80 | Programming 81 | ~~~~~~~~~~~ 82 | 83 | Air2phin also provides API to use in your program, all you need to do is import :code:`air2phin.runner` module and 84 | call :code:`with_str` or :code:`with_file` based on your input type. 85 | 86 | For String 87 | ^^^^^^^^^^ 88 | 89 | :code:`with_file` will handle and migrate the input string, and return the migrated string. 90 | 91 | .. code-block:: python 92 | 93 | from air2phin import runner 94 | 95 | code = """from airflow.operators.bash import BashOperator 96 | 97 | test = BashOperator( 98 | task_id='test', 99 | bash_command='echo 1', 100 | ) 101 | """ 102 | 103 | migrated = runner.with_str(code) 104 | print(migrated) 105 | 106 | For File 107 | ^^^^^^^^ 108 | 109 | :code:`with_file` will handle and migrate the input file, and write the migrated result with addition :code:`-ariphin` as a stem suffix 110 | to the same directory as the input file. 111 | 112 | .. code-block:: python 113 | 114 | from air2phin import runner 115 | 116 | path = "~/airflow/dags/tutorial.py" 117 | 118 | migrated = runner.with_file(path) 119 | print(migrated) 120 | 121 | 122 | What's Next 123 | ----------- 124 | 125 | - :doc:`cli` if you want to deep dive into CLI usage 126 | - :doc:`arch` if you want to know Air2phin's architecture 127 | -------------------------------------------------------------------------------- /examples/airflow/bash.py: -------------------------------------------------------------------------------- 1 | # from datetime import datetime, timedelta 2 | 3 | 4 | # import airflow.DAG as DAG 5 | from airflow import DAG 6 | from airflow.operators.bash import BashOperator 7 | from airflow.operators.dummy_operator import DummyOperator 8 | from ariflow import * 9 | 10 | with DAG( 11 | "tutorial", 12 | schedule_interval=timedelta(days=1), 13 | start_date=datetime(2021, 1, 1), 14 | ) as dag: 15 | run_this = BashOperator( 16 | task_id="print_date", 17 | bash_command="date", 18 | ) 19 | 20 | dummy = DummyOperator( 21 | task_id="dummy", 22 | ) 23 | -------------------------------------------------------------------------------- /examples/airflow/python.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | from airflow import DAG 4 | from airflow.operators.python_operator import PythonOperator 5 | 6 | 7 | def print_context(ds, **kwargs): 8 | print(ds) 9 | 10 | 11 | with DAG( 12 | "tutorial", 13 | schedule_interval=timedelta(days=1), 14 | start_date=datetime(2021, 1, 1), 15 | ) as dag: 16 | run_this = PythonOperator( 17 | task_id="print_the_context", 18 | python_callable=print_context, 19 | ) 20 | -------------------------------------------------------------------------------- /examples/airflow/tutorial.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | 19 | """ 20 | ### Tutorial Documentation 21 | Documentation that goes along with the Airflow tutorial located 22 | [here](https://airflow.apache.org/tutorial.html) 23 | """ 24 | # [START tutorial] 25 | # [START import_module] 26 | from datetime import datetime, timedelta 27 | from textwrap import dedent 28 | 29 | # The DAG object; we'll need this to instantiate a DAG 30 | from airflow import DAG 31 | 32 | # Operators; we need this to operate! 33 | from airflow.operators.bash import BashOperator 34 | 35 | # [END import_module] 36 | 37 | # [START default_args] 38 | # These args will get passed on to each operator 39 | # You can override them on a per-task basis during operator initialization 40 | default_args = { 41 | "owner": "airflow", 42 | "depends_on_past": False, 43 | "email": ["airflow@examples.com"], 44 | "email_on_failure": False, 45 | "email_on_retry": False, 46 | "retries": 1, 47 | "retry_delay": timedelta(minutes=5), 48 | # 'queue': 'bash_queue', 49 | # 'pool': 'backfill', 50 | # 'priority_weight': 10, 51 | # 'end_date': datetime(2016, 1, 1), 52 | # 'wait_for_downstream': False, 53 | # 'dag': dag, 54 | # 'sla': timedelta(hours=2), 55 | # 'execution_timeout': timedelta(seconds=300), 56 | # 'on_failure_callback': some_function, 57 | # 'on_success_callback': some_other_function, 58 | # 'on_retry_callback': another_function, 59 | # 'sla_miss_callback': yet_another_function, 60 | # 'trigger_rule': 'all_success' 61 | } 62 | # [END default_args] 63 | 64 | # [START instantiate_dag] 65 | with DAG( 66 | "tutorial", 67 | default_args=default_args, 68 | description="A simple tutorial DAG", 69 | schedule_interval=timedelta(days=1), 70 | start_date=datetime(2021, 1, 1), 71 | catchup=False, 72 | tags=["examples"], 73 | ) as dag: 74 | # [END instantiate_dag] 75 | 76 | # t1, t2 and t3 are examples of tasks created by instantiating operators 77 | # [START basic_task] 78 | t1 = BashOperator( 79 | task_id="print_date", 80 | bash_command="date", 81 | ) 82 | 83 | t2 = BashOperator( 84 | task_id="sleep", 85 | depends_on_past=False, 86 | bash_command="sleep 5", 87 | retries=3, 88 | ) 89 | # [END basic_task] 90 | 91 | # [START documentation] 92 | t1.doc_md = dedent( 93 | """\ 94 | #### Task Documentation 95 | You can document your task using the attributes `doc_md` (markdown), 96 | `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets 97 | rendered in the UI's Task Instance Details page. 98 | ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png) 99 | """ 100 | ) 101 | 102 | dag.doc_md = ( 103 | __doc__ # providing that you have a docstring at the beginning of the DAG 104 | ) 105 | dag.doc_md = """ 106 | This is a documentation placed anywhere 107 | """ # otherwise, type it like this 108 | # [END documentation] 109 | 110 | # [START jinja_template] 111 | templated_command = dedent( 112 | """ 113 | {% for i in range(5) %} 114 | echo "{{ ds }}" 115 | echo "{{ macros.ds_add(ds, 7)}}" 116 | echo "{{ params.my_param }}" 117 | {% endfor %} 118 | """ 119 | ) 120 | 121 | t3 = BashOperator( 122 | task_id="templated", 123 | depends_on_past=False, 124 | bash_command=templated_command, 125 | params={"my_param": "Parameter I passed in"}, 126 | ) 127 | # [END jinja_template] 128 | 129 | t1 >> [t2, t3] 130 | # [END tutorial] 131 | -------------------------------------------------------------------------------- /examples/libcst/metadata.py: -------------------------------------------------------------------------------- 1 | """source code copy-paste from https://libcst.readthedocs.io/en/latest/metadata_tutorial.html""" 2 | from typing import Optional 3 | 4 | import libcst as cst 5 | from libcst.metadata import PositionProvider, QualifiedNameProvider 6 | 7 | 8 | class IsParamProvider(cst.BatchableMetadataProvider[bool]): 9 | """ 10 | Marks Name nodes found as a parameter to a function. 11 | """ 12 | 13 | def __init__(self) -> None: 14 | super().__init__() 15 | self.is_param = False 16 | 17 | def visit_Param(self, node: cst.Param) -> None: 18 | # Mark the child Name node as a parameter 19 | self.set_metadata(node.name, True) 20 | 21 | def visit_Name(self, node: cst.Name) -> None: 22 | # Mark all other Name nodes as not parameters 23 | if not self.get_metadata(type(self), node, False): 24 | self.set_metadata(node, False) 25 | 26 | 27 | # module = cst.parse_module("x") 28 | # wrapper = cst.MetadataWrapper(module) 29 | # 30 | # isparam = wrapper.resolve(IsParamProvider) 31 | # x_name_node = wrapper.module.body[0].body[0].value 32 | 33 | # print(isparam[x_name_node]) 34 | 35 | 36 | class ParamPrinter(cst.CSTVisitor): 37 | METADATA_DEPENDENCIES = ( 38 | IsParamProvider, 39 | PositionProvider, 40 | QualifiedNameProvider, 41 | ) 42 | 43 | def visit_Call(self, node: cst.Call) -> Optional[bool]: 44 | # Only print out names that are parameters 45 | if self.get_metadata(QualifiedNameProvider, node): 46 | pos = self.get_metadata(PositionProvider, node).start 47 | print(f"found at line {pos.line}, column {pos.column}") 48 | 49 | return super().visit_Call(node) 50 | 51 | def visit_Name(self, node: cst.Name) -> None: 52 | # Only print out names that are parameters 53 | if self.get_metadata(IsParamProvider, node): 54 | pos = self.get_metadata(PositionProvider, node).start 55 | print(f"{node.value} found at line {pos.line}, column {pos.column}") 56 | 57 | 58 | module = cst.parse_module( 59 | "from datetime import datetime\n\ndef foo(x):\n y = 1\n now = datetime.now()\n return x + y" 60 | ) 61 | wrapper = cst.MetadataWrapper(module) 62 | result = wrapper.visit(ParamPrinter()) # NB: wrapper.visit not module.visit 63 | -------------------------------------------------------------------------------- /examples/libcst/visit_transform.py: -------------------------------------------------------------------------------- 1 | """source code copy-paste from https://libcst.readthedocs.io/en/latest/tutorial.html""" 2 | 3 | import difflib 4 | from typing import Dict, List, Optional, Tuple 5 | 6 | import libcst as cst 7 | 8 | py_source = ''' 9 | class PythonToken(Token): 10 | def __repr__(self): 11 | return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % 12 | self._replace(type=self.type.name)) 13 | 14 | def tokenize(code, version_info, start_pos=(1, 0)): 15 | """Generate tokens from a the source code (string).""" 16 | lines = split_lines(code, keepends=True) 17 | return tokenize_lines(lines, version_info, start_pos=start_pos) 18 | ''' 19 | 20 | pyi_source = """ 21 | class PythonToken(Token): 22 | def __repr__(self) -> str: ... 23 | 24 | def tokenize( 25 | code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0) 26 | ) -> Generator[PythonToken, None, None]: ... 27 | """ 28 | 29 | 30 | class TypingCollector(cst.CSTVisitor): 31 | def __init__(self): 32 | # stack for storing the canonical name of the current function 33 | super().__init__() 34 | self.stack: List[Tuple[str, ...]] = [] 35 | # store the annotations 36 | self.annotations: Dict[ 37 | Tuple[str, ...], # key: tuple of canonical class/function name 38 | Tuple[cst.Parameters, Optional[cst.Annotation]], # value: (params, returns) 39 | ] = {} 40 | 41 | def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]: 42 | self.stack.append(node.name.value) 43 | 44 | def leave_ClassDef(self, node: cst.ClassDef) -> None: 45 | self.stack.pop() 46 | 47 | def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]: 48 | self.stack.append(node.name.value) 49 | self.annotations[tuple(self.stack)] = (node.params, node.returns) 50 | return False # pyi files don't support inner functions, return False to stop the traversal. 51 | 52 | def leave_FunctionDef(self, node: cst.FunctionDef) -> None: 53 | self.stack.pop() 54 | 55 | 56 | class TypingTransformer(cst.CSTTransformer): 57 | def __init__(self, annotations): 58 | # stack for storing the canonical name of the current function 59 | super().__init__() 60 | self.stack: List[Tuple[str, ...]] = [] 61 | # store the annotations 62 | self.annotations: Dict[ 63 | Tuple[str, ...], # key: tuple of canonical class/function name 64 | Tuple[cst.Parameters, Optional[cst.Annotation]], # value: (params, returns) 65 | ] = annotations 66 | 67 | def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]: 68 | self.stack.append(node.name.value) 69 | 70 | def leave_ClassDef( 71 | self, original_node: cst.ClassDef, updated_node: cst.ClassDef 72 | ) -> cst.CSTNode: 73 | self.stack.pop() 74 | return updated_node 75 | 76 | def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]: 77 | self.stack.append(node.name.value) 78 | return False # pyi files don't support inner functions, return False to stop the traversal. 79 | 80 | def leave_FunctionDef( 81 | self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef 82 | ) -> cst.CSTNode: 83 | key = tuple(self.stack) 84 | self.stack.pop() 85 | if key in self.annotations: 86 | annotations = self.annotations[key] 87 | return updated_node.with_changes( 88 | params=annotations[0], returns=annotations[1] 89 | ) 90 | return updated_node 91 | 92 | 93 | source_tree = cst.parse_module(py_source) 94 | stub_tree = cst.parse_module(pyi_source) 95 | 96 | visitor = TypingCollector() 97 | stub_tree.visit(visitor) 98 | transformer = TypingTransformer(visitor.annotations) 99 | modified_tree = source_tree.visit(transformer) 100 | 101 | print(modified_tree.code) 102 | print( 103 | "".join( 104 | difflib.unified_diff( 105 | py_source.splitlines(True), modified_tree.code.splitlines(True) 106 | ) 107 | ) 108 | ) 109 | -------------------------------------------------------------------------------- /examples/transfer_tutorial.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | 19 | from air2phin import runner 20 | 21 | with open("./airflow/python.py") as f: 22 | data = f.read() 23 | 24 | print(runner.with_str(data)) 25 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = air2phin 3 | version = attr: air2phin.__version__ 4 | url = https://github.com/WhaleOps/air2phin 5 | description = Air2phin is a tool for migrating Airflow DAGs to DolphinScheduler Python API. 6 | long_description = file: README.md 7 | long_description_content_type = text/markdown 8 | author = Jay Chung 9 | author_email = zhongjiajie955@gmail.com 10 | license = Apache License 2.0 11 | license_files = 12 | file: LICENSE 13 | keywords = 14 | all 15 | # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers 16 | classifiers = 17 | Development Status :: 3 - Alpha 18 | License :: OSI Approved :: GNU General Public License v3 (GPLv3) 19 | Operating System :: OS Independent 20 | Programming Language :: Python 21 | Programming Language :: Python :: 3 22 | Programming Language :: Python :: 3 :: Only 23 | Programming Language :: Python :: 3.6 24 | Programming Language :: Python :: 3.7 25 | Programming Language :: Python :: 3.8 26 | Programming Language :: Python :: 3.9 27 | Programming Language :: Python :: 3.10 28 | Programming Language :: Python :: 3.11 29 | Programming Language :: Python :: 3.12 30 | Programming Language :: Python :: Implementation :: CPython 31 | Environment :: Console 32 | Topic :: Software Development :: Libraries :: Python Modules 33 | project_urls = 34 | Source = https://github.com/WhaleOps/air2phin 35 | Issue Tracker = https://github.com/WhaleOps/air2phin/issues 36 | Documentation = https://air2phin.readthedocs.io 37 | Changelog = https://github.com/WhaleOps/air2phin/releases 38 | 39 | [options] 40 | python_requires = >=3.6 41 | include_package_data = True 42 | zip_safe = true 43 | platforms = any 44 | package_dir = 45 | =src 46 | packages = 47 | find_namespace: 48 | install_requires = 49 | libcst 50 | PyYaml 51 | tqdm 52 | 53 | [options.packages.find] 54 | where=src 55 | 56 | [options.package_data] 57 | air2phin = 58 | rules/**/*.yaml 59 | 60 | [options.entry_points] 61 | console_scripts = 62 | air2phin = air2phin.cli.command:main 63 | 64 | [options.extras_require] 65 | fake = 66 | sqlalchemy==2.0.4 67 | dev = 68 | # style 69 | black>=22.8 70 | flake8>=4.0 71 | flake8-docstrings>=1.6 72 | flake8-black>=0.2 73 | isort>=5.10 74 | autoflake>=1.4 75 | # test 76 | pytest>=6.2 77 | pytest-cov>=3.0 78 | # docs 79 | sphinx>=4.3 80 | sphinx_rtd_theme>=1.0 81 | sphinx-copybutton>=0.4.0 82 | sphinx-argparse>=0.3.2 83 | sphinx-github-changelog 84 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | """The script for setting up air2phin.""" 19 | import logging 20 | import os 21 | from distutils.dir_util import remove_tree 22 | from typing import List 23 | 24 | from setuptools import Command, setup 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class CleanCommand(Command): 30 | """Command to clean up python api before setup by running `python setup.py pre_clean`.""" 31 | 32 | description = "Clean up project root" 33 | user_options: List[str] = [] 34 | clean_list = [ 35 | "build", 36 | "htmlcov", 37 | "dist", 38 | ".pytest_cache", 39 | ".coverage", 40 | ] 41 | 42 | def initialize_options(self) -> None: 43 | """Set default values for options.""" 44 | 45 | def finalize_options(self) -> None: 46 | """Set final values for options.""" 47 | 48 | def run(self) -> None: 49 | """Run and remove temporary files.""" 50 | for cl in self.clean_list: 51 | if not os.path.exists(cl): 52 | logger.info("Path %s do not exists.", cl) 53 | elif os.path.isdir(cl): 54 | remove_tree(cl) 55 | else: 56 | os.remove(cl) 57 | logger.info("Finish pre_clean process.") 58 | 59 | 60 | setup( 61 | cmdclass={ 62 | "clean": CleanCommand, 63 | }, 64 | ) 65 | -------------------------------------------------------------------------------- /src/air2phin/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | __project_name__ = "Air2phin" 19 | __version__ = "1.0.0-dev" 20 | -------------------------------------------------------------------------------- /src/air2phin/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/cli/__init__.py -------------------------------------------------------------------------------- /src/air2phin/cli/command.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import difflib 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | from typing import Dict, Sequence 7 | 8 | from air2phin import __project_name__, __version__ 9 | from air2phin.constants import Regexp, Token 10 | from air2phin.core.rules.config import Config 11 | from air2phin.core.rules.loader import build_in_rules, path_rule 12 | from air2phin.runner import Runner 13 | from air2phin.utils.file import recurse_files 14 | 15 | logging.basicConfig(stream=sys.stdout, level=logging.INFO) 16 | logger = logging.getLogger("air2phin") 17 | 18 | common_args: Dict[str, Dict] = { 19 | "custom_rules": { 20 | "help": f"The custom rule file path you want to add to {__project_name__}.", 21 | "action": "append", 22 | "type": Path, 23 | }, 24 | "custom_only": { 25 | "help": "Only use custom rules and ignore all built-in's, it is helpful for patching the" 26 | "exists migration.", 27 | "action": "store_true", 28 | }, 29 | "verbose": { 30 | "action": "store_true", 31 | "help": "Show more verbose output.", 32 | }, 33 | } 34 | 35 | 36 | def build_argparse() -> argparse.ArgumentParser: 37 | """Build argparse.ArgumentParser with specific configuration.""" 38 | parser = argparse.ArgumentParser( 39 | prog="air2phin", 40 | description="Air2phin is a tool for migrating Airflow DAGs to DolphinScheduler Python API.", 41 | ) 42 | 43 | # Version 44 | parser.add_argument( 45 | "-v", 46 | "--version", 47 | action="version", 48 | version=f"{__project_name__} version {__version__}", 49 | help="Show version of %(prog)s.", 50 | ) 51 | 52 | # Subcommands 53 | subparsers = parser.add_subparsers( 54 | title="subcommands", 55 | dest="subcommand", 56 | help=f"Subcommand you want to {__project_name__} to run.", 57 | ) 58 | 59 | # Test 60 | parser_test = subparsers.add_parser( 61 | "test", help=f"{__project_name__} playground for migrating with standard input." 62 | ) 63 | parser_test.add_argument( 64 | "-v", 65 | "--verbose", 66 | **common_args["verbose"], 67 | ) 68 | parser_test.add_argument( 69 | "-r", 70 | "--custom-rules", 71 | **common_args["custom_rules"], 72 | ) 73 | parser_test.add_argument( 74 | "-R", 75 | "--custom-only", 76 | **common_args["custom_only"], 77 | ) 78 | parser_test.add_argument( 79 | "-d", 80 | "--diff", 81 | action="store_true", 82 | help=f"Prints diff of all the changes {__project_name__} would make.", 83 | ) 84 | parser_test.add_argument( 85 | "stdin", 86 | help="The standard input you want to migrate.", 87 | action="store", 88 | type=str, 89 | ) 90 | 91 | # migrate 92 | parser_migrate = subparsers.add_parser( 93 | "migrate", help="Migrate Airflow DAGs to DolphinScheduler Python definition." 94 | ) 95 | parser_migrate.add_argument( 96 | "-v", 97 | "--verbose", 98 | **common_args["verbose"], 99 | ) 100 | parser_migrate.add_argument( 101 | "-r", 102 | "--custom-rules", 103 | **common_args["custom_rules"], 104 | ) 105 | parser_migrate.add_argument( 106 | "-R", 107 | "--custom-only", 108 | **common_args["custom_only"], 109 | ) 110 | parser_migrate.add_argument( 111 | "-I", 112 | "--include", 113 | help=f"Include files based on conditions provided, default '{Regexp.PATH_PYTHON}'", 114 | action="store", 115 | default=Regexp.PATH_PYTHON, 116 | type=str, 117 | ) 118 | parser_migrate.add_argument( 119 | "-E", 120 | "--exclude", 121 | help="Exclude files based on conditions provided, without default value", 122 | action="store", 123 | type=str, 124 | ) 125 | parser_migrate.add_argument( 126 | "-i", 127 | "--inplace", 128 | help="Migrate python file in place instead of create a new file.", 129 | action="store_true", 130 | ) 131 | parser_migrate.add_argument( 132 | "-m", 133 | "--multiprocess", 134 | help="Migrate python files with multiprocess.", 135 | action="store", 136 | type=int, 137 | ) 138 | parser_migrate.add_argument( 139 | "sources", 140 | default=[Path(".")], 141 | nargs="*", 142 | help="The directories or files paths you want to migrate.", 143 | action="store", 144 | type=Path, 145 | ) 146 | 147 | # Rule 148 | parser_rule = subparsers.add_parser("rule", help="Rule of migrating.") 149 | parser_rule.add_argument( 150 | "-s", 151 | "--show", 152 | action="store_true", 153 | help=f"Show all rules for {__project_name__} migrate.", 154 | ) 155 | 156 | return parser 157 | 158 | 159 | def main(argv: Sequence[str] = None) -> None: 160 | """Run air2phin in command line.""" 161 | parser = build_argparse() 162 | argv = argv if argv is not None else sys.argv[1:] 163 | # argv = ["rule", "--show"] 164 | args = parser.parse_args(argv) 165 | 166 | if hasattr(args, "verbose") and args.verbose: 167 | logger.setLevel(logging.DEBUG) 168 | logger.debug("Finish parse air2phin arguments, current args is %s.", args) 169 | 170 | # recurse all file in given path 171 | customs_rules = [] 172 | if hasattr(args, "custom_rules") and args.custom_rules: 173 | for rule in args.custom_rules: 174 | customs_rules.extend(recurse_files(rule)) 175 | if logger.level <= logging.DEBUG and customs_rules: 176 | logger.debug( 177 | "This migration have custom rules:\n%s", 178 | Token.NEW_LINE.join((f" {r}" for r in customs_rules)), 179 | ) 180 | 181 | if args.subcommand == "test": 182 | stdin = args.stdin 183 | config = Config(customs=customs_rules, customs_only=args.custom_only) 184 | runner = Runner(config) 185 | 186 | result = runner.with_str(stdin) 187 | logger.debug("The source input is:\n%s", stdin) 188 | logger.info(f"Migrated result is: \n{result}") 189 | 190 | if args.diff: 191 | diff = difflib.unified_diff( 192 | stdin.splitlines(keepends=True), 193 | result.splitlines(keepends=True), 194 | fromfile="source", 195 | tofile="dest", 196 | ) 197 | logger.info( 198 | f"The different between source and target is: \n{''.join(diff)}" 199 | ) 200 | 201 | if args.subcommand == "migrate": 202 | migrate_files = [] 203 | for path in args.sources: 204 | migrate_files.extend(recurse_files(path, args.include, args.exclude)) 205 | 206 | config = Config( 207 | customs=customs_rules, customs_only=args.custom_only, inplace=args.inplace 208 | ) 209 | runner = Runner(config) 210 | 211 | if args.multiprocess: 212 | runner.with_files_multiprocess(migrate_files, args.multiprocess) 213 | else: 214 | runner.with_files(migrate_files) 215 | 216 | if args.subcommand == "rule": 217 | if args.show: 218 | rules = build_in_rules() 219 | logger.info(f"Total {len(rules)} rules:\n") 220 | for rule in rules: 221 | print(rule.relative_to(path_rule)) 222 | 223 | 224 | if __name__ == "__main__": 225 | raise SystemExit(main()) 226 | -------------------------------------------------------------------------------- /src/air2phin/constants.py: -------------------------------------------------------------------------------- 1 | class Token: 2 | """Constants token for air2phin.""" 3 | 4 | QUESTION: str = "?" 5 | COMMA: str = "," 6 | POINT: str = "." 7 | SPACE: str = " " 8 | ZERO: str = "0" 9 | IMPORT: str = "import" 10 | STRING: str = "str" 11 | CODE: str = "code" 12 | NEW_LINE: str = "\n" 13 | 14 | 15 | class Keyword: 16 | """Constants keywords for air2phin.""" 17 | 18 | MIGRATE_MARK: str = "-air2phin" 19 | WORKFLOW_SUBMIT: str = "submit" 20 | AIRFLOW_DAG_SCHEDULE: str = "schedule_interval" 21 | AIRFLOW_DAG: str = "airflow.DAG" 22 | AIRFLOW_DAG_SIMPLE: str = "DAG" 23 | DEFAULT_SCHEDULE: str = "0 0 0 * * ? *" 24 | 25 | 26 | class Regexp: 27 | """Constants regular expression for air2phin.""" 28 | 29 | PATH_YAML: str = "*.yaml" 30 | PATH_PYTHON: str = "*.py" 31 | PATH_ALL: str = "**/*" 32 | 33 | 34 | class ConfigKey: 35 | """Constants config file for air2phin.""" 36 | 37 | NAME: str = "name" 38 | 39 | EXAMPLE: str = "examples" 40 | 41 | MIGRATION: str = "migration" 42 | MODULE: str = "module" 43 | PARAMETER: str = "parameter" 44 | 45 | ACTION: str = "action" 46 | SOURCE: str = "src" 47 | DESTINATION: str = "dest" 48 | 49 | KW_REPLACE: str = "replace" 50 | KW_ADD: str = "add" 51 | KW_REMOVE: str = "remove" 52 | 53 | ARGUMENT: str = "arg" 54 | DEFAULT: str = "default" 55 | TYPE: str = "type" 56 | VALUE: str = "value" 57 | 58 | 59 | class Number: 60 | """Constants number for air2phin.""" 61 | 62 | SCHEDULE_TOTAL_NUM: int = 9 63 | SCHEDULE_SPACE_NUM: int = 4 64 | -------------------------------------------------------------------------------- /src/air2phin/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /src/air2phin/core/rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/core/rules/__init__.py -------------------------------------------------------------------------------- /src/air2phin/core/rules/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import warnings 3 | from pathlib import Path 4 | from typing import Any, Dict, List, NamedTuple, Optional 5 | 6 | from air2phin.constants import ConfigKey, Regexp, Token 7 | from air2phin.core.rules.loader import rule_calls, rule_imports 8 | from air2phin.utils.file import read_yaml, recurse_files 9 | 10 | logger = logging.getLogger("air2phin.config") 11 | 12 | 13 | class ParamDefaultConfig(NamedTuple): 14 | """Default statement config.""" 15 | 16 | type: str 17 | value: str 18 | 19 | 20 | class CallConfig(NamedTuple): 21 | """Call config.""" 22 | 23 | long: str 24 | short: str 25 | src_long: str 26 | src_short: str 27 | replace: Dict[str, str] 28 | add: Dict[str, ParamDefaultConfig] 29 | remove: List[str] 30 | 31 | 32 | class ImportConfig(NamedTuple): 33 | """Import config.""" 34 | 35 | replace: str 36 | add: List[str] 37 | remove: bool 38 | 39 | 40 | class Config: 41 | """Configurations of air2phin, including all configs change behavior of air2phin. 42 | 43 | :param customs: User custom path of rules, will combine with build-in rules when :param:``customs_only`` 44 | is False, will only use custom rules and ignore build-in rules when :param:``customs_only`` is True. 45 | :param customs_only: Only use custom rules or not. 46 | :param inplace: Replace source python file inplace instead of create a new file. 47 | :param imports: Build-in imports rules path. 48 | :param calls: Build-in call rules path. 49 | """ 50 | 51 | def __init__( 52 | self, 53 | customs: Optional[List[Path]] = None, 54 | customs_only: Optional[bool] = False, 55 | inplace: Optional[bool] = False, 56 | imports: Optional[List[Path]] = rule_imports, 57 | calls: Optional[List[Path]] = rule_calls, 58 | ): 59 | self._customs = customs 60 | self.customs_only = customs_only 61 | if self.customs_only and not self._customs: 62 | raise ValueError( 63 | "Argument `customs` not allow value None, when customs_only is True." 64 | ) 65 | if self.customs_only: 66 | warnings.warn( 67 | "Will only use customs rules to migration, will ignore built-in rules.", 68 | UserWarning, 69 | stacklevel=2, 70 | ) 71 | self.inplace = inplace 72 | self._imports = imports 73 | self._calls = calls 74 | # Want to be compatible with python 3.6 and python 3.7, so can not use 75 | # ``from functools import cached_property`` 76 | self._call_migrator: Dict[str, CallConfig] | None = None 77 | self._import_migrator: Dict[str, ImportConfig] | None = None 78 | 79 | @property 80 | def imports_path(self) -> List[Path]: 81 | """Get all imports path for migration rules, the built-in rules before custom rules. 82 | 83 | Will only use :param:``customs`` rules and ignore built-in rules when :param:``customs_only`` is True, 84 | and combine :param:``customs``, built-in rules when :param:``customs_only`` is False. 85 | """ 86 | if self.customs_only: 87 | return self._customs 88 | 89 | if not self._customs: 90 | return self._imports 91 | self._imports.extend(self._customs) 92 | return self._imports 93 | 94 | @property 95 | def imports(self) -> Dict[str, ImportConfig]: 96 | """Get all import migrator from rules.""" 97 | if self._import_migrator: 98 | return self._import_migrator 99 | self._import_migrator = self.imp_migrator() 100 | return self._import_migrator 101 | 102 | @property 103 | def calls_path(self) -> List[Path]: 104 | """Get all call path for migration rules, the built-in rules before custom rules. 105 | 106 | Will only use :param:``customs`` rules and ignore built-in rules when :param:``customs_only`` is True, 107 | and combine :param:``customs``, built-in rules when :param:``customs_only`` is False. 108 | """ 109 | if self.customs_only: 110 | return self._customs 111 | 112 | if not self._customs: 113 | return self._calls 114 | self._calls.extend(self._customs) 115 | return self._calls 116 | 117 | @property 118 | def calls(self) -> Dict[str, CallConfig]: 119 | """Get all call migrator from rules.""" 120 | if self._call_migrator: 121 | return self._call_migrator 122 | self._call_migrator = self.call_migrator() 123 | return self._call_migrator 124 | 125 | @staticmethod 126 | def _build_caller( 127 | src: str, dest: str, parameters: List[Dict[str, Any]] 128 | ) -> CallConfig: 129 | replace = dict() 130 | add = dict() 131 | remove = [] 132 | 133 | if parameters: 134 | for p in parameters: 135 | if p[ConfigKey.ACTION] == ConfigKey.KW_REPLACE: 136 | replace[p[ConfigKey.SOURCE]] = p[ConfigKey.DESTINATION] 137 | elif p[ConfigKey.ACTION] == ConfigKey.KW_ADD: 138 | add[p[ConfigKey.ARGUMENT]] = ParamDefaultConfig( 139 | type=p[ConfigKey.DEFAULT][ConfigKey.TYPE], 140 | value=p[ConfigKey.DEFAULT][ConfigKey.VALUE], 141 | ) 142 | elif p[ConfigKey.ACTION] == ConfigKey.KW_REMOVE: 143 | remove.append(p[ConfigKey.ARGUMENT]) 144 | else: 145 | raise ValueError( 146 | f"Unknown action type {p[ConfigKey.ACTION]} in {p}" 147 | ) 148 | 149 | return CallConfig( 150 | long=dest, 151 | short=dest.split(Token.POINT)[-1], 152 | src_long=src, 153 | src_short=src.split(Token.POINT)[-1], 154 | replace=replace, 155 | add=add, 156 | remove=remove, 157 | ) 158 | 159 | @staticmethod 160 | def get_module_action( 161 | migration: Dict[str, Any], action_type: str 162 | ) -> Optional[Dict[str, Any]]: 163 | """Get specific action type from rules. 164 | 165 | :param migration: Config Migration node. 166 | :param action_type: Action type, can be `add`, `remove`, `replace`. 167 | """ 168 | actions = [ 169 | action 170 | for action in migration[ConfigKey.MODULE] 171 | if action[ConfigKey.ACTION] == action_type 172 | ] 173 | if len(actions) > 1: 174 | raise ValueError("Each type of action can only have one.") 175 | return actions[0] if actions else None 176 | 177 | @staticmethod 178 | def rules_override(rule_paths: List[Path]) -> List[Dict]: 179 | """Handle rules override, override the previous rules by the latest one when have the same name. 180 | 181 | Use dict comprehension to overwrite built-in rules, if custom rules also have the same name rules, 182 | will use the latest rules pass to :class:`Config`. 183 | """ 184 | rules_map = {} 185 | 186 | rule_files = [] 187 | for path in rule_paths: 188 | rule_files.extend(recurse_files(path, include=Regexp.PATH_YAML)) 189 | 190 | for filename in rule_files: 191 | content = read_yaml(filename) 192 | rule_name = content.get(ConfigKey.NAME) 193 | if rule_name in rules_map: 194 | logger.info( 195 | "Rule name with %s will be override by file %s", rule_name, filename 196 | ) 197 | rules_map[rule_name] = content 198 | return list(rules_map.values()) 199 | 200 | def call_migrator(self) -> Dict[str, CallConfig]: 201 | """Get all call migrator from rules.""" 202 | migrator = {} 203 | 204 | for rule in self.rules_override(self.calls_path): 205 | migration = rule[ConfigKey.MIGRATION] 206 | parameters = migration.get(ConfigKey.PARAMETER, None) 207 | replace = self.get_module_action(migration, ConfigKey.KW_REPLACE) 208 | if replace is None: 209 | continue 210 | src = replace[ConfigKey.SOURCE] 211 | dest = replace[ConfigKey.DESTINATION] 212 | 213 | if isinstance(src, str): 214 | migrator[src] = self._build_caller(src, dest, parameters) 215 | elif isinstance(src, list): 216 | for inner_src in src: 217 | migrator[inner_src] = self._build_caller( 218 | inner_src, dest, parameters 219 | ) 220 | else: 221 | raise RuntimeError("Invalid migration.module.src type: %s" % type(src)) 222 | return migrator 223 | 224 | @staticmethod 225 | def _build_replace_importer(action: Dict[str, Any]) -> Optional[str]: 226 | if action is None: 227 | return None 228 | dest = action[ConfigKey.DESTINATION] 229 | module, asname = dest.rsplit(Token.POINT, 1) 230 | return f"from {module} import {asname}" 231 | 232 | @staticmethod 233 | def _get_rp_add_action(action: Dict[str, Any]) -> List[str]: 234 | """Get replace and add action list from rules. 235 | 236 | :param action: Config migration module action. 237 | """ 238 | 239 | def _build_import_statement(mod: str) -> str: 240 | spec = mod.rsplit(Token.POINT, 1) 241 | return f"from {spec[0]} import {spec[1]}" 242 | 243 | if action is None: 244 | return [] 245 | module = action[ConfigKey.MODULE] 246 | if isinstance(module, str): 247 | return [_build_import_statement(module)] 248 | elif isinstance(module, list): 249 | return [_build_import_statement(mod) for mod in module] 250 | else: 251 | raise RuntimeError( 252 | "Invalid migration.module.action.module type: %s" % type(module) 253 | ) 254 | 255 | @staticmethod 256 | def _build_remove_importer(action: Dict[str, Any]) -> bool: 257 | if action is None or ConfigKey.MODULE not in action: 258 | return False 259 | return True 260 | 261 | def imp_migrator(self) -> Dict[str, ImportConfig]: 262 | """Get all import migrator from rules.""" 263 | imps = {} 264 | 265 | for rule in self.rules_override(self.imports_path): 266 | replace = self.get_module_action( 267 | rule[ConfigKey.MIGRATION], ConfigKey.KW_REPLACE 268 | ) 269 | add = self.get_module_action(rule[ConfigKey.MIGRATION], ConfigKey.KW_ADD) 270 | remove = self.get_module_action( 271 | rule[ConfigKey.MIGRATION], ConfigKey.KW_REMOVE 272 | ) 273 | 274 | qualname = ( 275 | replace[ConfigKey.SOURCE] if replace else remove[ConfigKey.MODULE] 276 | ) 277 | if isinstance(qualname, str): 278 | imps[qualname] = ImportConfig( 279 | replace=self._build_replace_importer(replace), 280 | add=self._get_rp_add_action(add), 281 | remove=self._build_remove_importer(remove), 282 | ) 283 | elif isinstance(qualname, list): 284 | for inner_src in qualname: 285 | imps[inner_src] = ImportConfig( 286 | replace=self._build_replace_importer(replace), 287 | add=self._get_rp_add_action(add), 288 | remove=self._build_remove_importer(remove), 289 | ) 290 | 291 | return imps 292 | -------------------------------------------------------------------------------- /src/air2phin/core/rules/loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | project = Path(__file__).parent.parent.parent 5 | 6 | path_rule = project.joinpath("rules") 7 | path_operators = path_rule.joinpath("operators") 8 | path_hooks = path_rule.joinpath("hooks") 9 | path_models = path_rule.joinpath("models") 10 | path_utils = path_rule.joinpath("utils") 11 | path_dag_cnx = path_rule.joinpath("core") 12 | 13 | rule_imports = [path_dag_cnx, path_operators, path_hooks, path_models, path_utils] 14 | 15 | rule_calls = [path_dag_cnx, path_operators, path_hooks, path_models] 16 | 17 | 18 | def build_in_rules() -> List[Path]: 19 | """Get all build-in rules in air2phin.rules directory.""" 20 | return [path for path in path_rule.glob("**/*") if path.is_file()] 21 | -------------------------------------------------------------------------------- /src/air2phin/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/core/transformer/__init__.py -------------------------------------------------------------------------------- /src/air2phin/core/transformer/imports.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | from typing import Optional, Sequence, Union 18 | 19 | import libcst as cst 20 | import libcst.matchers as m 21 | from libcst import FlattenSentinel, RemovalSentinel 22 | 23 | from air2phin.constants import Token 24 | from air2phin.core.rules.config import Config, ImportConfig 25 | 26 | 27 | class ImportTransformer(cst.CSTTransformer): 28 | """CST Transformer for airflow operators.""" 29 | 30 | def __init__(self, config: Config): 31 | super().__init__() 32 | self.config: Config = config 33 | self.mod_ref = None 34 | self.class_names = [] 35 | 36 | def _get_attr_nested_value(self, node: cst.Attribute) -> str: 37 | if m.matches(node.value, m.TypeOf(m.Name)): 38 | return f"{node.value.value}.{node.attr.value}" 39 | elif m.matches(node.value, m.TypeOf(m.Attribute)): 40 | nested = self._get_attr_nested_value( 41 | cst.ensure_type(node.value, cst.Attribute) 42 | ) 43 | return f"{nested}.{node.attr.value}" 44 | 45 | def visit_ImportFrom(self, node: cst.ImportFrom) -> Optional[bool]: 46 | # case ``from modules import class`` 47 | if m.matches(node.module, m.TypeOf(m.Name)): 48 | self.mod_ref = node.module.value 49 | # case ``from package.module.[module1] import class`` 50 | elif m.matches(node.module, m.TypeOf(m.Attribute)): 51 | self.mod_ref = self._get_attr_nested_value( 52 | cst.ensure_type(node.module, cst.Attribute) 53 | ) 54 | 55 | # skip ``import *``, aka ImportStar 56 | if isinstance(node.names, Sequence): 57 | self.class_names = [ 58 | cst.ensure_type(ia, cst.ImportAlias).name.value for ia in node.names 59 | ] 60 | return False 61 | 62 | def leave_ImportFrom( 63 | self, original_node: cst.ImportFrom, updated_node: cst.ImportFrom 64 | ) -> Union[ 65 | cst.BaseSmallStatement, FlattenSentinel[cst.BaseSmallStatement], RemovalSentinel 66 | ]: 67 | if self.mod_ref is not None: 68 | src_full_refs = [ 69 | f"{self.mod_ref}.{class_name}" for class_name in self.class_names 70 | ] 71 | 72 | replaces = [] 73 | adds = set() 74 | remove = set() 75 | for full_ref in src_full_refs: 76 | if full_ref in self.config.imports: 77 | dest: ImportConfig = self.config.imports[full_ref] 78 | if dest.remove: 79 | remove.update(full_ref) 80 | replaces.append(dest.replace) 81 | adds.update(dest.add) 82 | 83 | # remove remove statement 84 | if remove: 85 | return cst.RemoveFromParent() 86 | 87 | # get replace statement 88 | if len(replaces) == 0: 89 | return updated_node 90 | elif len(replaces) == 1: 91 | statement = replaces[0] 92 | else: 93 | class_name_only = [ 94 | stat.split(f" {Token.IMPORT} ")[1] for stat in replaces[1:] 95 | ] 96 | statement = f"{Token.COMMA} ".join(replaces[:1] + class_name_only) 97 | 98 | # Return replace and add statement 99 | # TODO, will use ; as separator of multiple statements, we should better use \n in the future 100 | return FlattenSentinel( 101 | [ 102 | *[cst.parse_statement(add).body[0] for add in adds], 103 | cst.parse_statement(statement).body[0], 104 | ] 105 | ) 106 | return updated_node 107 | -------------------------------------------------------------------------------- /src/air2phin/core/transformer/operators.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import Optional, Sequence, Union 3 | 4 | import libcst as cst 5 | import libcst.matchers as m 6 | from libcst import Arg, BaseExpression, FlattenSentinel, RemovalSentinel 7 | 8 | from air2phin.constants import Keyword, Token 9 | from air2phin.core.rules.config import CallConfig, Config, ParamDefaultConfig 10 | from air2phin.utils import string 11 | 12 | 13 | class OpTransformer(cst.CSTTransformer): 14 | """CST Transformer for airflow operators. 15 | 16 | TODO Need to skip inner call like DAG(date_time=datetime.datetime.now().strftime("%Y-%m-%d")) 17 | 18 | :param qualified_name: qualified name of operator 19 | """ 20 | 21 | def __init__(self, config: Config, qualified_name: Optional[str] = None): 22 | super().__init__() 23 | self._config: Config = config 24 | self.qualified_name = qualified_name 25 | assert self.qualified_name is not None 26 | self.visit_name = False 27 | self.migrated_param = set() 28 | 29 | @property 30 | def config(self) -> CallConfig: 31 | return self._config.calls.get(self.qualified_name) 32 | 33 | def matcher_op_name(self, node: cst.Name) -> bool: 34 | if self.visit_name is False and node.value == self.config.src_short: 35 | self.visit_name = True 36 | return True 37 | return False 38 | 39 | def match_replace_name(self, node: cst.Arg) -> bool: 40 | migrate_names = self.config.replace.keys() 41 | return m.matches( 42 | node, 43 | m.Arg(keyword=m.Name(m.MatchIfTrue(lambda name: name in migrate_names))), 44 | ) 45 | 46 | def match_remove_name(self, node: cst.Arg) -> bool: 47 | return m.matches( 48 | node, 49 | m.Arg( 50 | keyword=m.Name(m.MatchIfTrue(lambda name: name in self.config.remove)) 51 | ), 52 | ) 53 | 54 | def match_call_name(self, node: cst.Call) -> bool: 55 | if m.matches(node.func, m.TypeOf(m.Name)): 56 | val = cst.ensure_type(node.func, cst.Name).value 57 | elif m.matches(node.func, m.TypeOf(m.Attribute)): 58 | val = cst.ensure_type(node.func, cst.Attribute).attr.value 59 | else: 60 | return True 61 | return val in self.qualified_name.split(Token.POINT) 62 | 63 | def _handle_specific_args(self, node: cst.Arg) -> cst.Arg: 64 | """Handle specific args for custom rule. 65 | 66 | Including: 67 | * airflow.DAG.schedule_interval: migrate schedule value 68 | """ 69 | name = node.keyword.value 70 | if ( 71 | Keyword.AIRFLOW_DAG in self.qualified_name 72 | and name == Keyword.AIRFLOW_DAG_SCHEDULE 73 | ): 74 | if not m.matches( 75 | node, 76 | m.Arg(value=m.SimpleString()), 77 | ): 78 | return node.with_changes( 79 | value=cst.SimpleString(f"'{Keyword.DEFAULT_SCHEDULE}'") 80 | ) 81 | 82 | orig_value = cst.ensure_type(node.value, cst.SimpleString).value 83 | value = string.convert_schedule(orig_value.strip("'").strip('"')) 84 | return node.with_changes(value=cst.SimpleString(value=f"'{value}'")) 85 | return node 86 | 87 | def leave_Name( 88 | self, original_node: cst.Name, updated_node: cst.Name 89 | ) -> "BaseExpression": 90 | """Handle callable name according to configuration. 91 | 92 | Which include the following steps: 93 | * Change callable name. 94 | """ 95 | if self.matcher_op_name(original_node): 96 | dest_name = self.config.short 97 | return updated_node.with_changes(value=dest_name) 98 | return updated_node 99 | 100 | def leave_Arg( 101 | self, original_node: cst.Arg, updated_node: cst.Arg 102 | ) -> Union[Arg, FlattenSentinel[cst.Arg], RemovalSentinel]: 103 | """Handle callable argument name according to configuration. 104 | 105 | Which include the following steps: 106 | * Change argument name. 107 | * Replace airflow.DAG argument ``schedule_interval`` to dolphinscheduler style. 108 | """ 109 | if self.match_remove_name(original_node): 110 | return cst.RemoveFromParent() 111 | 112 | if original_node.keyword: 113 | updated_node = self._handle_specific_args(updated_node) 114 | 115 | if self.match_replace_name(original_node): 116 | original_keyword = original_node.keyword.value 117 | dest_keyword = self.config.replace.get(original_keyword) 118 | 119 | self.migrated_param.add(dest_keyword) 120 | return updated_node.with_changes( 121 | keyword=cst.Name(value=dest_keyword), value=updated_node.value 122 | ) 123 | 124 | return updated_node 125 | 126 | def _handle_missing_default(self, nodes: Sequence[cst.Arg]) -> Sequence[cst.Arg]: 127 | mutable = list(nodes) 128 | one_of = copy.deepcopy(mutable[-1]) 129 | for arg in self.config.add.keys(): 130 | default: ParamDefaultConfig = self.config.add.get(arg) 131 | 132 | if default.type == Token.STRING: 133 | value = cst.SimpleString(value=f'"{default.value}"') 134 | elif default.type == Token.CODE: 135 | value = cst.parse_expression(default.value) 136 | else: 137 | raise NotImplementedError 138 | mutable.append( 139 | one_of.with_changes( 140 | value=value, 141 | keyword=cst.Name(value=arg), 142 | ) 143 | ) 144 | return mutable 145 | 146 | def leave_Call( 147 | self, original_node: cst.Call, updated_node: cst.Call 148 | ) -> BaseExpression: 149 | if not self.config.add: 150 | return updated_node 151 | 152 | if not self.match_call_name(original_node): 153 | return updated_node 154 | 155 | return updated_node.with_changes( 156 | args=self._handle_missing_default(updated_node.args) 157 | ) 158 | -------------------------------------------------------------------------------- /src/air2phin/core/transformer/route.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | import warnings 18 | from typing import List, Set, Union 19 | 20 | import libcst as cst 21 | import libcst.matchers as m 22 | from libcst import BaseExpression, FlattenSentinel, RemovalSentinel, SimpleStatementLine 23 | from libcst.metadata import PositionProvider, QualifiedName, QualifiedNameProvider 24 | 25 | from air2phin.constants import Keyword 26 | from air2phin.core.rules.config import Config 27 | from air2phin.core.transformer.imports import ImportTransformer 28 | from air2phin.core.transformer.operators import OpTransformer 29 | 30 | 31 | class Transformer(cst.CSTTransformer): 32 | """CST Transformer route class from airflow to dolphinscheduler-sdk-python. 33 | 34 | The main class to call each rules to migrate, just like a router, currently will route to `imports` and 35 | `operators` transformer. 36 | 37 | :param config: libCST transformer configuration, use it to get importer and callable migrate setting. 38 | """ 39 | 40 | METADATA_DEPENDENCIES = ( 41 | QualifiedNameProvider, 42 | PositionProvider, 43 | ) 44 | 45 | def __init__(self, config: Config): 46 | super().__init__() 47 | self.config: Config = config 48 | self.workflow_alias = set() 49 | self.have_submit_expr = set() 50 | 51 | @staticmethod 52 | def _get_qualified_name(qualifie: Set[QualifiedName]) -> str: 53 | if len(qualifie) > 1: 54 | warnings.warn( 55 | "QualifiedNameProvider get more than one qualified name, will use the first one.", 56 | RuntimeWarning, 57 | ) 58 | for q in qualifie: 59 | return q.name 60 | 61 | # @m.call_if_inside( 62 | # m.Call( 63 | # func=m.MatchMetadataIfTrue( 64 | # meta.QualifiedNameProvider, 65 | # lambda qualnames: any( 66 | # n.name in call_cov for n in qualnames 67 | # ), 68 | # ) 69 | # ) 70 | # ) 71 | def leave_Call( 72 | self, original_node: cst.Call, updated_node: cst.Call 73 | ) -> BaseExpression: 74 | qnp = self.get_metadata(QualifiedNameProvider, original_node) 75 | if qnp: 76 | qnp_name = self._get_qualified_name(qnp) 77 | if qnp_name in self.config.calls: 78 | return updated_node.visit(OpTransformer(self.config, qnp_name)) 79 | return updated_node 80 | 81 | def leave_ImportFrom( 82 | self, original_node: cst.ImportFrom, updated_node: cst.ImportFrom 83 | ) -> Union[ 84 | cst.BaseSmallStatement, FlattenSentinel[cst.BaseSmallStatement], RemovalSentinel 85 | ]: 86 | """Migrate from import statement.""" 87 | return updated_node.visit(ImportTransformer(self.config)) 88 | 89 | def leave_WithItem_asname(self, node: cst.WithItem) -> None: 90 | """Get airflow Dags alias names.""" 91 | if m.matches(node.item, m.Call()) and m.matches( 92 | cst.ensure_type(node.item, cst.Call).func, 93 | m.Name(value=Keyword.AIRFLOW_DAG_SIMPLE), 94 | ): 95 | self.workflow_alias.add(node.asname.name.value) 96 | 97 | def leave_Expr( 98 | self, original_node: cst.Expr, updated_node: cst.Expr 99 | ) -> Union[ 100 | cst.BaseSmallStatement, 101 | cst.FlattenSentinel[cst.BaseSmallStatement], 102 | RemovalSentinel, 103 | ]: 104 | """Update workflow ``alias.submit()`` expr exits or not statement.""" 105 | if m.matches( 106 | original_node.value, 107 | m.Call( 108 | func=m.Attribute( 109 | value=m.OneOf(*[m.Name(a) for a in self.workflow_alias]), 110 | attr=m.Name(Keyword.WORKFLOW_SUBMIT), 111 | ) 112 | ), 113 | ): 114 | self.have_submit_expr.add(original_node.value.func.value.value) 115 | return updated_node 116 | 117 | def _build_submit_exprs(self) -> List[SimpleStatementLine]: 118 | miss_alias = self.workflow_alias.difference(self.have_submit_expr) 119 | return [ 120 | cst.parse_statement(f"{alias}.{Keyword.WORKFLOW_SUBMIT}()") 121 | for alias in miss_alias 122 | ] 123 | 124 | def leave_Module( 125 | self, original_node: cst.Module, updated_node: cst.Module 126 | ) -> cst.Module: 127 | if self.have_submit_expr == self.workflow_alias: 128 | return updated_node 129 | 130 | # add submit expr when do not have 131 | body_with_submit = list(updated_node.body) 132 | body_with_submit.extend(self._build_submit_exprs()) 133 | return updated_node.with_changes(body=body_with_submit) 134 | -------------------------------------------------------------------------------- /src/air2phin/fake/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/fake/__init__.py -------------------------------------------------------------------------------- /src/air2phin/fake/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/fake/core/__init__.py -------------------------------------------------------------------------------- /src/air2phin/fake/core/connection.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | 4 | class Connection(NamedTuple): 5 | """Connection Info store in dolphinscheduler metadata database.""" 6 | 7 | host: str 8 | port: int 9 | schema: str 10 | login: str 11 | password: str 12 | -------------------------------------------------------------------------------- /src/air2phin/fake/core/hook.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | from contextlib import closing 5 | from typing import Any, Callable, Iterable, Mapping, Optional, Tuple, Union 6 | 7 | import sqlparse 8 | from sqlalchemy import create_engine, text 9 | 10 | from air2phin.constants import Token 11 | from air2phin.fake.core.connection import Connection 12 | 13 | 14 | def fetch_all_handler(cursor) -> list[tuple] | None: 15 | """Handler for DbApiHook.run() to return results.""" 16 | if cursor.description is not None: 17 | return cursor.fetchall() 18 | else: 19 | return None 20 | 21 | 22 | class BaseHook: 23 | """Base hook for all fake hook. 24 | 25 | This hook is a fake hook for Airflow base hook, to allow user do not change their code but use 26 | dolphinscheduler datasource connection to connect specific datasource. 27 | 28 | :param connection: specific hook connection. :class:``air2phin.fake.core.connection.Connection`` object. 29 | """ 30 | 31 | def __init__( 32 | self, 33 | conn_name_attr: Optional[str] = None, 34 | connection: Optional[Connection] = None, 35 | ): 36 | self.conn_name_attr = conn_name_attr 37 | self.connection = connection 38 | 39 | def get_conn(self): 40 | """Get hook connection object, depend on subclass return type.""" 41 | if self.connection: 42 | return self.connection 43 | elif self.conn_name_attr: 44 | self.connection = self.get_connection(self.conn_name_attr) 45 | return self.connection 46 | else: 47 | raise ValueError( 48 | "Can not get connection, nether parameter ``conn_name_attr``" 49 | "nor ``connection`` provided." 50 | ) 51 | 52 | @staticmethod 53 | def parser_conn_namedtuple(connection_params: str) -> Connection: 54 | """Parse dolphinscheduler connection_params to Connection. 55 | 56 | :param connection_params: connection_params from dolphinscheduler datasource. 57 | """ 58 | data = json.loads(connection_params) 59 | 60 | pattern = re.compile( 61 | "jdbc:.*://(?P[\\w\\W]+):(?P\\d+)/(?P\\w+)(\\?|$)" 62 | ) 63 | # node name change to url when using seatunnel datasource connector 64 | try: 65 | pattern_match = pattern.match( 66 | data.get("jdbcUrl", data.get("url", None)) 67 | ).groupdict() 68 | except Exception: 69 | raise ValueError( 70 | f"Can not parser connection params, make sure connection in format " 71 | f"`jdbc:dbtype://host:port/database` which is only acceptable by air2phin, " 72 | f"currently is : {connection_params}" 73 | ) 74 | 75 | return Connection( 76 | host=pattern_match.get("host", None), 77 | port=int(pattern_match.get("port", None)), 78 | schema=pattern_match.get("database", None), 79 | login=data.get("user", None), 80 | password=data.get("password", None), 81 | ) 82 | 83 | @staticmethod 84 | def _get_type_name(conn_id) -> Tuple[Any, str]: 85 | if Token.POINT in conn_id: 86 | return conn_id.strip().split(Token.POINT) 87 | return None, conn_id.strip() 88 | 89 | @classmethod 90 | def _get_connection_params_from_env( 91 | cls, metadata_conn: str, conn_id: str 92 | ) -> Connection: 93 | sql_qry_type_name = ( 94 | "SELECT connection_params FROM t_ds_datasource WHERE type = {type} and " 95 | "name = '{name}'" 96 | ) 97 | sql_qry_name = ( 98 | "SELECT connection_params FROM t_ds_datasource WHERE name = '{name}'" 99 | ) 100 | 101 | database_type_map = dict( 102 | mysql=0, 103 | postgresql=1, 104 | hive=2, 105 | spark=3, 106 | clickhouse=4, 107 | oracle=5, 108 | sqlserver=6, 109 | db2=7, 110 | presto=8, 111 | h2=9, 112 | redshift=10, 113 | dameng=11, 114 | starrocks=12, 115 | ) 116 | 117 | datasource_type, datasource_name = cls._get_type_name(conn_id) 118 | engine = create_engine(metadata_conn, echo=True) 119 | 120 | with engine.connect() as conn: 121 | # conn_id not in format of datasource_type.datasource_name 122 | if Token.POINT not in conn_id: 123 | result_name = conn.execute( 124 | text(sql_qry_name.format(name=datasource_name)) 125 | ) 126 | if result_name.rowcount == 0: 127 | raise ValueError( 128 | f"Connection {conn_id} not found in dolphinscheduler metadata database." 129 | ) 130 | elif result_name.rowcount > 1: 131 | raise ValueError( 132 | f"Connection {conn_id} is not unique in dolphinscheduler metadata database, please " 133 | f"use ``datasource_type.datasource_name`` to specify." 134 | ) 135 | record = result_name.fetchone() 136 | return cls.parser_conn_namedtuple(record[0]) 137 | 138 | # conn_id in format of datasource_type.datasource_name 139 | if datasource_type.lower() not in database_type_map: 140 | raise ValueError( 141 | f"Datasource type `{datasource_type}` not support currently, please use one of " 142 | f"{list(database_type_map.keys())}" 143 | ) 144 | result_type_name = conn.execute( 145 | text( 146 | sql_qry_type_name.format( 147 | type=database_type_map.get(datasource_type), 148 | name=datasource_name, 149 | ) 150 | ) 151 | ) 152 | if result_type_name.rowcount == 0: 153 | raise ValueError( 154 | f"Connection {conn_id} not found in dolphinscheduler metadata database." 155 | ) 156 | record = result_type_name.fetchone() 157 | return cls.parser_conn_namedtuple(record[0]) 158 | 159 | @classmethod 160 | def get_connection(cls, conn_id: str) -> Connection: 161 | """Get connection from dolphinscheduler metadata database. 162 | 163 | This method is a fake function for Airflow connection get_connection, to allow user do not change 164 | their code but use dolphinscheduler datasource and return 165 | :class:``air2phin.fake.core.connection.Connection`` object. 166 | 167 | :param conn_id: connection id, if in format of datasource_type.datasource_name, will query by type 168 | and name, and if only use datasource_name, will query by name only. 169 | """ 170 | try: 171 | from pydolphinscheduler.models.datasource import Datasource 172 | 173 | datasource_type, datasource_name = cls._get_type_name(conn_id) 174 | database: Datasource = Datasource.get( 175 | datasource_name=datasource_name, datasource_type=datasource_type 176 | ) 177 | return cls.parser_conn_namedtuple(database.connection_params) 178 | except ImportError: 179 | metadata_conn = os.environ.get("AIR2PHIN_FAKE_CONNECTION", None) 180 | if metadata_conn is None: 181 | raise ValueError( 182 | "Can not get dolphinscheduler metadata connection information, neither package" 183 | "``pydolphinscheduler`` installed nor environment variable ``AIR2PHIN_FAKE_CONNECTION``" 184 | "is set, please do one of them to keep going." 185 | ) 186 | return cls._get_connection_params_from_env(metadata_conn, conn_id) 187 | 188 | def _run_command(self, cur, sql_statement, parameters): 189 | """Execute command.""" 190 | if parameters: 191 | cur.execute(sql_statement, parameters) 192 | else: 193 | cur.execute(sql_statement) 194 | 195 | def run( 196 | self, 197 | sql: Union[str, Iterable[str]], 198 | autocommit: bool = False, 199 | parameters: Union[Iterable, Mapping, None] = None, 200 | handler: Union[Callable, None] = None, 201 | split_statements: bool = False, 202 | return_last: bool = True, 203 | ) -> Union[Any, list[Any], None]: 204 | """Mock sql run command.""" 205 | scalar_return_last = isinstance(sql, str) and return_last 206 | if isinstance(sql, str): 207 | if split_statements: 208 | splits = sqlparse.split(sqlparse.format(sql, strip_comments=True)) 209 | sql: list[str] = list(filter(None, splits)) 210 | else: 211 | sql = [sql] 212 | 213 | if not sql: 214 | raise ValueError("List of SQL statements is empty") 215 | 216 | with closing(self.get_conn()) as conn: 217 | with closing(conn.cursor()) as cur: 218 | results = [] 219 | for sql_statement in sql: 220 | self._run_command(cur, sql_statement, parameters) 221 | 222 | if handler is not None: 223 | result = handler(cur) 224 | results.append(result) 225 | 226 | if autocommit: 227 | conn.commit() 228 | 229 | if handler is None: 230 | return None 231 | elif scalar_return_last: 232 | return results[-1] 233 | else: 234 | return results 235 | 236 | @staticmethod 237 | def fetch_all_handler(cursor) -> Union[list[tuple], None]: 238 | """Handler for DbApiHook.run() to return results.""" 239 | if cursor.description is not None: 240 | return cursor.fetchall() 241 | else: 242 | return None 243 | 244 | def get_records( 245 | self, 246 | sql: Union[str, list[str]], 247 | parameters: Union[Iterable, Mapping, None] = None, 248 | ) -> Any: 249 | """Mock executes sql and returns records.""" 250 | return self.run(sql=sql, parameters=parameters, handler=fetch_all_handler) 251 | -------------------------------------------------------------------------------- /src/air2phin/fake/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/fake/hooks/__init__.py -------------------------------------------------------------------------------- /src/air2phin/fake/hooks/mysql.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from air2phin.fake.core.connection import Connection 4 | from air2phin.fake.core.hook import BaseHook 5 | 6 | 7 | class MySqlHook(BaseHook): 8 | """Interact with MySQL. 9 | 10 | This hook is a fake hook for Airflow MySQL hook, to allow user do not change their code but use 11 | dolphinscheduler datasource connection to connect to MySQL database. 12 | 13 | :param connection: specific hook connection. :class:``air2phin.fake.core.connection.Connection`` object. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | conn_name_attr: Optional[str] = None, 19 | connection: Optional[Connection] = None, 20 | *args, 21 | **kwargs 22 | ): 23 | super().__init__(conn_name_attr, connection) 24 | 25 | def get_conn(self) -> "MySQLdb.connections.Connection": # noqa: F821 26 | """Get MySQL connection object.""" 27 | try: 28 | import MySQLdb 29 | except ImportError: 30 | raise ImportError("This MySQLdb module does not seem to be installed.") 31 | 32 | connection = super().get_conn() 33 | conn_args = dict( 34 | host=connection.host, 35 | port=connection.port, 36 | db=connection.schema, 37 | user=connection.login, 38 | passwd=connection.password, 39 | ) 40 | return MySQLdb.connect(**conn_args) 41 | -------------------------------------------------------------------------------- /src/air2phin/fake/hooks/postgres.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from air2phin.fake.core.connection import Connection 4 | from air2phin.fake.core.hook import BaseHook 5 | 6 | 7 | class PostgresHook(BaseHook): 8 | """Interact with PostgresSQL. 9 | 10 | This hook is a fake hook for Airflow Postgres hook, to allow user do not change their code but use 11 | dolphinscheduler datasource connection to connect to PostgresSQL database. 12 | 13 | :param connection: specific hook connection. :class:``air2phin.fake.core.connection.Connection`` object. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | conn_name_attr: Optional[str] = None, 19 | connection: Optional[Connection] = None, 20 | *args, 21 | **kwargs 22 | ): 23 | super().__init__(conn_name_attr, connection) 24 | 25 | def get_conn(self) -> "psycopg2.extensions.connection": # noqa: F821 26 | """Get postgres connection object.""" 27 | try: 28 | import psycopg2 29 | except ImportError: 30 | raise ImportError("This psycopg2 module does not seem to be installed.") 31 | 32 | connection = super().get_conn() 33 | conn_args = dict( 34 | host=connection.host, 35 | port=connection.port, 36 | dbname=connection.schema, 37 | user=connection.login, 38 | password=connection.password, 39 | ) 40 | return psycopg2.connect(**conn_args) 41 | -------------------------------------------------------------------------------- /src/air2phin/fake/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Mock airflow.models modules.""" 2 | 3 | from air2phin.fake.models.variable import Variable # noqa: F401 4 | -------------------------------------------------------------------------------- /src/air2phin/fake/models/variable.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | 4 | class Variable: 5 | """Mock airflow.models.Variable class, make migrator do less thing.""" 6 | 7 | @classmethod 8 | def get( 9 | cls, 10 | key: str, 11 | default_var: Any = None, 12 | *args, 13 | **kwargs, 14 | ) -> str: 15 | """Modck airflow.models.Variable.get method, only return income parameter.""" 16 | return key if default_var is None else default_var 17 | 18 | @classmethod 19 | def set( 20 | cls, 21 | key: str, 22 | value: Any, 23 | *args, 24 | **kwargs, 25 | ) -> str: 26 | """Mock airflow.models.Variable.set method, do nothing.""" 27 | 28 | @classmethod 29 | def update( 30 | cls, 31 | key: str, 32 | value: Any, 33 | *args, 34 | **kwargs, 35 | ) -> str: 36 | """Mock airflow.models.Variable.update method, do nothing.""" 37 | 38 | @classmethod 39 | def delete( 40 | cls, 41 | key: str, 42 | *args, 43 | **kwargs, 44 | ) -> str: 45 | """Mock airflow.models.Variable.delete method, do nothing.""" 46 | -------------------------------------------------------------------------------- /src/air2phin/fake/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/fake/utils/__init__.py -------------------------------------------------------------------------------- /src/air2phin/fake/utils/trigger_rule.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TriggerRule(str, Enum): 5 | """Mock trigger rule""" 6 | 7 | ALL_SUCCESS = "all_success" 8 | ALL_FAILED = "all_failed" 9 | ALL_DONE = "all_done" 10 | ONE_SUCCESS = "one_success" 11 | ONE_FAILED = "one_failed" 12 | ONE_DONE = "one_done" 13 | NONE_FAILED = "none_failed" 14 | NONE_FAILED_OR_SKIPPED = "none_failed_or_skipped" 15 | NONE_SKIPPED = "none_skipped" 16 | DUMMY = "dummy" 17 | ALWAYS = "always" 18 | NONE_FAILED_MIN_ONE_SUCCESS = "none_failed_min_one_success" 19 | ALL_SKIPPED = "all_skipped" 20 | 21 | @classmethod 22 | def is_valid(cls, trigger_rule: str) -> bool: 23 | """Validates a trigger rule.""" 24 | return trigger_rule in cls.all_triggers() 25 | 26 | @classmethod 27 | def all_triggers(cls) -> set[str]: 28 | """Returns all trigger rules.""" 29 | return set(cls.__members__.values()) 30 | 31 | def __str__(self) -> str: 32 | return self.value 33 | -------------------------------------------------------------------------------- /src/air2phin/rules/core/dagContext.yaml: -------------------------------------------------------------------------------- 1 | name: DAG-Context 2 | description: The configuration for migrating airflow.DAG context to pydolphinscheduler workflow context. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.DAG 8 | dest: pydolphinscheduler.core.process_definition.ProcessDefinition 9 | parameter: 10 | - action: replace 11 | src: dag_id 12 | dest: name 13 | - action: replace 14 | src: start_date 15 | dest: start_time 16 | - action: replace 17 | src: schedule_interval 18 | dest: schedule 19 | 20 | examples: 21 | assign: 22 | description: | 23 | The example of migrating `airflow.DAG` declaration with assigned. 24 | src: | 25 | from airflow import DAG 26 | from datetime import datetime 27 | 28 | dag = DAG( 29 | dag_id='dag', 30 | description='DAG description', 31 | start_date=datetime(2020, 1, 1), 32 | schedule_interval='5 4 * * *', 33 | ) 34 | dest: | 35 | from pydolphinscheduler.core.process_definition import ProcessDefinition 36 | from datetime import datetime 37 | 38 | dag = ProcessDefinition( 39 | name='dag', 40 | description='DAG description', 41 | start_time=datetime(2020, 1, 1), 42 | schedule='0 5 4 * * ? *', 43 | ) 44 | context: 45 | description: | 46 | The example of migrating `airflow.DAG` in context mode. 47 | src: | 48 | from airflow import DAG 49 | from datetime import datetime 50 | 51 | with DAG( 52 | dag_id='dag', 53 | description='DAG description', 54 | start_date=datetime(2020, 1, 1), 55 | schedule_interval='@once', 56 | ) as dag: 57 | pass 58 | dest: | 59 | from pydolphinscheduler.core.process_definition import ProcessDefinition 60 | from datetime import datetime 61 | 62 | with ProcessDefinition( 63 | name='dag', 64 | description='DAG description', 65 | start_time=datetime(2020, 1, 1), 66 | schedule='0 0 0 * * ? *', 67 | ) as dag: 68 | pass 69 | dag.submit() 70 | -------------------------------------------------------------------------------- /src/air2phin/rules/core/removeModule.yaml: -------------------------------------------------------------------------------- 1 | name: remove-module 2 | description: The example to remove qualname in DAGs 3 | 4 | migration: 5 | module: 6 | - action: remove 7 | module: 8 | - foo.Bar 9 | - foo.Bar1 10 | 11 | examples: 12 | assign: 13 | description: | 14 | The example to remove qualname in DAGs 15 | src: | 16 | from airflow import DAG 17 | from foo import Bar 18 | from foo import Bar1 19 | from datetime import datetime 20 | dest: | 21 | from pydolphinscheduler.core.process_definition import ProcessDefinition 22 | from datetime import datetime 23 | -------------------------------------------------------------------------------- /src/air2phin/rules/hooks/MySqlHook.yaml: -------------------------------------------------------------------------------- 1 | name: MySqlHook 2 | description: The configuration for migrating airflow.providers.mysql.hooks.mysql.MySqlHook context to air2phin.fake.hooks.mysql. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.providers.mysql.hooks.mysql.MySqlHook 8 | dest: air2phin.fake.hooks.mysql.MySqlHook 9 | 10 | examples: 11 | providers: 12 | description: | 13 | The example of migrating `airflow.DAG` declaration with assigned. 14 | src: | 15 | from airflow.providers.mysql.hooks.mysql import MySqlHook 16 | hook = MySqlHook(mysql_conn_id='mysql_default') 17 | dest: | 18 | from air2phin.fake.hooks.mysql import MySqlHook 19 | hook = MySqlHook(mysql_conn_id='mysql_default') 20 | -------------------------------------------------------------------------------- /src/air2phin/rules/hooks/PostgresHook.yaml: -------------------------------------------------------------------------------- 1 | name: PostgresHook 2 | description: The configuration for migrating airflow.providers.postgres.hooks.postgres.PostgresHook to air2phin.fake.hooks.postgres. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.providers.postgres.hooks.postgres.PostgresHook 8 | dest: air2phin.fake.hooks.postgres.PostgresHook 9 | 10 | examples: 11 | providers: 12 | description: | 13 | The example of migrating `airflow.DAG` declaration with assigned. 14 | src: | 15 | from airflow.providers.postgres.hooks.postgres import PostgresHook 16 | hook = PostgresHook(postgres_conn_id='postgres_default') 17 | dest: | 18 | from air2phin.fake.hooks.postgres import PostgresHook 19 | hook = PostgresHook(postgres_conn_id='postgres_default') 20 | -------------------------------------------------------------------------------- /src/air2phin/rules/models/Variable.yaml: -------------------------------------------------------------------------------- 1 | name: Variable 2 | description: The configuration for migrating airflow.models.Variable to air2phin.fake.Variable. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: 8 | - airflow.models.Variable 9 | - airflow.models.variable.Variable 10 | dest: air2phin.fake.models.Variable 11 | 12 | examples: 13 | in-init: 14 | description: | 15 | The example of migrating `airflow.models.Variable` declaration. 16 | src: | 17 | from airflow.models import Variable 18 | var = Variable.get('var') 19 | dest: | 20 | from air2phin.fake.models import Variable 21 | var = Variable.get('var') 22 | absolute: 23 | description: | 24 | The example of migrating `airflow.models.variable.Variable` declaration. 25 | src: | 26 | from airflow.models.variable import Variable 27 | var = Variable.get('var') 28 | dest: | 29 | from air2phin.fake.models import Variable 30 | var = Variable.get('var') 31 | class-method: 32 | description: | 33 | The example of migrating `airflow.models.variable.Variable` declaration. 34 | src: | 35 | from airflow.models.variable import Variable 36 | Variable.set('var') 37 | Variable.get('var') 38 | Variable.update('var') 39 | Variable.delete('var') 40 | dest: | 41 | from air2phin.fake.models import Variable 42 | Variable.set('var') 43 | Variable.get('var') 44 | Variable.update('var') 45 | Variable.delete('var') 46 | -------------------------------------------------------------------------------- /src/air2phin/rules/operators/BashOperator.yaml: -------------------------------------------------------------------------------- 1 | name: BashOperator 2 | description: The configuration for migrating Airflow BashOperator to DolphinScheduler Shell task. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: 8 | - airflow.operators.bash.BashOperator 9 | - airflow.operators.bash_operator.BashOperator 10 | dest: pydolphinscheduler.tasks.shell.Shell 11 | parameter: 12 | - action: replace 13 | src: task_id 14 | dest: name 15 | - action: replace 16 | src: bash_command 17 | dest: command 18 | 19 | examples: 20 | bash: 21 | description: The example of migrating `airflow.operators.bash.BashOperator` to `pydolphinscheduler.tasks.shell.Shell`. 22 | src: | 23 | from airflow.operators.bash import BashOperator 24 | 25 | task = BashOperator( 26 | task_id='bash', 27 | bash_command='echo "Hello World!"', 28 | ) 29 | dest: | 30 | from pydolphinscheduler.tasks.shell import Shell 31 | 32 | task = Shell( 33 | name='bash', 34 | command='echo "Hello World!"', 35 | ) 36 | bash_operator: 37 | description: The example of migrating `airflow.operators.bash_operator.BashOperator` to `pydolphinscheduler.tasks.shell.Shell`. 38 | src: | 39 | from airflow.operators.bash_operator import BashOperator 40 | 41 | task = BashOperator( 42 | task_id='bash', 43 | bash_command='echo "Hello World!"', 44 | ) 45 | dest: | 46 | from pydolphinscheduler.tasks.shell import Shell 47 | 48 | task = Shell( 49 | name='bash', 50 | command='echo "Hello World!"', 51 | ) 52 | -------------------------------------------------------------------------------- /src/air2phin/rules/operators/DummyOperator.yaml: -------------------------------------------------------------------------------- 1 | name: DummyOperator 2 | description: The configuration for migrating Airflow DummyOperator to DolphinScheduler Shell task. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.operators.dummy_operator.DummyOperator 8 | dest: pydolphinscheduler.tasks.shell.Shell 9 | parameter: 10 | - action: replace 11 | src: task_id 12 | dest: name 13 | - action: add 14 | arg: command 15 | default: 16 | type: str 17 | value: "echo 'Airflow DummyOperator'" 18 | 19 | examples: 20 | dummy: 21 | description: | 22 | The example of migrating `airflow.operators.dummy_operator.DummyOperator`, we migrate it to `pydolphinscheduler.tasks.shell.Shell` 23 | with default value :code:``echo 'Airflow DummyOperator'`` as command because DolphinScheduler do not have any task like dummy operator. 24 | src: | 25 | from airflow.operators.dummy_operator import DummyOperator 26 | 27 | task = DummyOperator( 28 | task_id='dummy', 29 | ) 30 | # TODO: We have some indentation issues when add new arguments 31 | dest: | 32 | from pydolphinscheduler.tasks.shell import Shell 33 | 34 | task = Shell( 35 | name='dummy', 36 | command="echo 'Airflow DummyOperator'", 37 | ) 38 | -------------------------------------------------------------------------------- /src/air2phin/rules/operators/PostgreOperator.yaml: -------------------------------------------------------------------------------- 1 | name: PostgresOperator 2 | description: The configuration for migrating Airflow PostgresOperator to DolphinScheduler SQL task. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.providers.postgres.operators.postgres.PostgresOperator 8 | dest: pydolphinscheduler.tasks.sql.Sql 9 | parameter: 10 | - action: replace 11 | src: task_id 12 | dest: name 13 | - action: replace 14 | src: postgres_conn_id 15 | dest: datasource_name 16 | 17 | examples: 18 | bare_sql: 19 | description: | 20 | The example of migrating `airflow.providers.postgres.operators.postgres.PostgresOperator` with 21 | bare sql statement as parameter ``sql``. 22 | src: | 23 | from airflow.providers.postgres.operators.postgres import PostgresOperator 24 | 25 | task = PostgresOperator( 26 | task_id='postgres-sql', 27 | postgres_conn_id='postgres_default_conn', 28 | sql='select * from table', 29 | ) 30 | dest: | 31 | from pydolphinscheduler.tasks.sql import Sql 32 | 33 | task = Sql( 34 | name='postgres-sql', 35 | datasource_name='postgres_default_conn', 36 | sql='select * from table', 37 | ) 38 | sql_file: 39 | description: | 40 | The example of migrating `airflow.operators.spark_sql_operator.SparkSqlOperator` with sql file as 41 | parameter ``sql``. 42 | src: | 43 | from airflow.providers.postgres.operators.postgres import PostgresOperator 44 | 45 | task = PostgresOperator( 46 | task_id='postgres-sql', 47 | postgres_conn_id='postgres_default_conn', 48 | sql='test.sql', 49 | ) 50 | dest: | 51 | from pydolphinscheduler.tasks.sql import Sql 52 | 53 | task = Sql( 54 | name='postgres-sql', 55 | datasource_name='postgres_default_conn', 56 | sql='test.sql', 57 | ) 58 | -------------------------------------------------------------------------------- /src/air2phin/rules/operators/PythonOperator.yaml: -------------------------------------------------------------------------------- 1 | name: PythonOperator 2 | description: The configuration for migrating Airflow PythonOperator to DolphinScheduler Python task. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: 8 | - airflow.operators.python_operator.PythonOperator 9 | - airflow.operators.python.PythonOperator 10 | dest: pydolphinscheduler.tasks.python.Python 11 | parameter: 12 | - action: replace 13 | src: task_id 14 | dest: name 15 | - action: replace 16 | src: python_callable 17 | dest: definition 18 | 19 | examples: 20 | python_operator: 21 | description: The example of migrating `airflow.operators.python_operator.PythonOperator`. 22 | src: | 23 | from airflow.operators.python_operator import PythonOperator 24 | 25 | def foo(): 26 | print('Hello World!') 27 | 28 | task = PythonOperator( 29 | task_id='python', 30 | python_callable=foo, 31 | ) 32 | dest: | 33 | from pydolphinscheduler.tasks.python import Python 34 | 35 | def foo(): 36 | print('Hello World!') 37 | 38 | task = Python( 39 | name='python', 40 | definition=foo, 41 | ) 42 | nested_python: 43 | description: | 44 | The example of migrating `airflow.operators.python_operator.PythonOperator` with nested function in 45 | `python_callable`. 46 | src: | 47 | from airflow.operators.python_operator import PythonOperator 48 | 49 | def bar(): 50 | print('Hello World!') 51 | 52 | def foo(): 53 | bar() 54 | 55 | task = PythonOperator( 56 | task_id='python', 57 | python_callable=foo, 58 | ) 59 | dest: | 60 | from pydolphinscheduler.tasks.python import Python 61 | 62 | def bar(): 63 | print('Hello World!') 64 | 65 | def foo(): 66 | bar() 67 | 68 | task = Python( 69 | name='python', 70 | definition=foo, 71 | ) 72 | -------------------------------------------------------------------------------- /src/air2phin/rules/operators/SparkSqlOperator.yaml: -------------------------------------------------------------------------------- 1 | name: SparkSqlOperator 2 | description: The configuration for migrating Airflow SparkSqlOperator to DolphinScheduler SQL task. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.operators.spark_sql_operator.SparkSqlOperator 8 | dest: pydolphinscheduler.tasks.sql.Sql 9 | parameter: 10 | - action: replace 11 | src: task_id 12 | dest: name 13 | - action: replace 14 | src: conn_id 15 | dest: datasource_name 16 | 17 | examples: 18 | bare_sql: 19 | description: | 20 | The example of migrating `airflow.operators.spark_sql_operator.SparkSqlOperator` with bare sql statement 21 | as parameter ``sql``. 22 | src: | 23 | from airflow.operators.spark_sql_operator import SparkSqlOperator 24 | 25 | task = SparkSqlOperator( 26 | task_id='spark-sql', 27 | conn_id='spark_default_conn', 28 | sql='select * from table', 29 | ) 30 | dest: | 31 | from pydolphinscheduler.tasks.sql import Sql 32 | 33 | task = Sql( 34 | name='spark-sql', 35 | datasource_name='spark_default_conn', 36 | sql='select * from table', 37 | ) 38 | sql_file: 39 | description: | 40 | The example of migrating `airflow.operators.spark_sql_operator.SparkSqlOperator` with sql file as 41 | parameter ``sql``. 42 | src: | 43 | from airflow.operators.spark_sql_operator import SparkSqlOperator 44 | 45 | task = SparkSqlOperator( 46 | task_id='spark-sql', 47 | conn_id='spark_default_conn', 48 | sql='test.sql', 49 | ) 50 | dest: | 51 | from pydolphinscheduler.tasks.sql import Sql 52 | 53 | task = Sql( 54 | name='spark-sql', 55 | datasource_name='spark_default_conn', 56 | sql='test.sql', 57 | ) 58 | -------------------------------------------------------------------------------- /src/air2phin/rules/utils/TriggerRule.yaml: -------------------------------------------------------------------------------- 1 | name: TriggerRule 2 | description: The configuration for migrating airflow.utils.trigger_rule.TriggerRule to air2phin.fake.utils.trigger_rule.TriggerRule. 3 | 4 | migration: 5 | module: 6 | - action: replace 7 | src: airflow.utils.trigger_rule.TriggerRule 8 | dest: air2phin.fake.utils.trigger_rule.TriggerRule 9 | 10 | examples: 11 | in-init: 12 | description: | 13 | The example of migrating `airflow.models.Variable` declaration. 14 | src: | 15 | from airflow.utils.trigger_rule import TriggerRule 16 | TriggerRule.ALL_SUCCESS 17 | dest: | 18 | from air2phin.fake.utils.trigger_rule import TriggerRule 19 | TriggerRule.ALL_SUCCESS 20 | -------------------------------------------------------------------------------- /src/air2phin/runner.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import multiprocessing 3 | from multiprocessing import Pool 4 | from pathlib import Path 5 | from timeit import default_timer as timer 6 | from typing import List, Optional 7 | 8 | import libcst as cst 9 | from tqdm import tqdm 10 | 11 | from air2phin.constants import Keyword, Token 12 | from air2phin.core.rules.config import Config 13 | from air2phin.core.transformer.route import Transformer 14 | from air2phin.utils.file import add_stem_suffix, read, write 15 | 16 | logger = logging.getLogger("air2phin.runner") 17 | 18 | 19 | class Runner: 20 | """Air2phin runner, main class to run transformer. 21 | 22 | :param config: Config of air2phin. 23 | """ 24 | 25 | def __init__(self, config: Config) -> None: 26 | self.config: Config = config 27 | 28 | def with_str(self, content: str) -> str: 29 | """Run air2phin with a string and return migrated content. 30 | 31 | :param content: Content of string you want to migrate. 32 | """ 33 | parse_cst = cst.parse_module(content) 34 | wrapper = cst.MetadataWrapper(parse_cst) 35 | migrated = wrapper.visit(Transformer(self.config)).code 36 | return migrated 37 | 38 | def with_file(self, path: Path) -> None: 39 | """Run air2phin with a single file path and migrate to dolphinscheduler python sdk definition. 40 | 41 | Will change file inplace when ``config.inplace = True``, and create new file end with ``-air2phin`` 42 | when ``config.inplace = False``. 43 | 44 | :param path: Path of file you want to migrate. 45 | """ 46 | logger.debug("Start migrate file %s", path) 47 | start = timer() 48 | content = read(path) 49 | migrated = self.with_str(content) 50 | 51 | if self.config.inplace: 52 | write(path, migrated) 53 | else: 54 | new_path = add_stem_suffix(path, Keyword.MIGRATE_MARK) 55 | write(new_path, migrated) 56 | 57 | logger.debug("End migrate file %s, elapsed time %.5fs", path, timer() - start) 58 | 59 | def with_files(self, paths: List[Path]) -> None: 60 | """Run air2phin with multiple files to dolphinscheduler python sdk definition. 61 | 62 | :param paths: Path of file you want to migrate. 63 | """ 64 | logger.info("Start migrate files, total %d files scan.", len(paths)) 65 | logger.debug( 66 | "Start migrate files, files contain:\n%s", 67 | Token.NEW_LINE.join((f" {p}" for p in paths)), 68 | ) 69 | 70 | start = timer() 71 | for file in tqdm(paths): 72 | self.with_file(file) 73 | 74 | logger.info( 75 | f"Total migrated {len(paths)} files, spend time: %.5fs.", timer() - start 76 | ) 77 | 78 | def with_files_multiprocess( 79 | self, paths: List[Path], processes: Optional[int] = multiprocessing.cpu_count() 80 | ) -> None: 81 | """Run air2phin migrating with multiprocess. 82 | 83 | :param paths: Path of file you want to migrate. 84 | :param processes: multiprocess processes cpu count number. 85 | """ 86 | logger.info( 87 | "Start multiple processing migrate files, total %d files scan.", len(paths) 88 | ) 89 | logger.debug( 90 | "Start migrate files with processes number %d, files contain:\n%s", 91 | processes, 92 | Token.NEW_LINE.join((f" {p}" for p in paths)), 93 | ) 94 | 95 | start = timer() 96 | with Pool(processes) as pool: 97 | list(tqdm(pool.imap(self.with_file, paths), total=len(paths))) 98 | 99 | logger.debug( 100 | "All files had add to multiprocess pool, spend time %.5fs.", timer() - start 101 | ) 102 | pool.join() 103 | 104 | logger.info( 105 | f"Total migrated {len(paths)} files, spend time: %.5fs.", timer() - start 106 | ) 107 | -------------------------------------------------------------------------------- /src/air2phin/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/src/air2phin/utils/__init__.py -------------------------------------------------------------------------------- /src/air2phin/utils/file.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Dict, List, Optional 3 | 4 | import yaml 5 | 6 | from air2phin.constants import Regexp 7 | 8 | 9 | def read(path: Path) -> str: 10 | """Read content from path. 11 | 12 | :param path: Path to read content. 13 | """ 14 | with open(path, "r") as f: 15 | return f.read() 16 | 17 | 18 | def write(path: Path, content: str) -> None: 19 | """Write content to path. 20 | 21 | The path's content will be overwritten if they are already exists. 22 | 23 | :param path: Path to write content. 24 | :param content: Content want to write to path. 25 | """ 26 | with open(path, "w") as f: 27 | f.write(content) 28 | 29 | 30 | def read_yaml(path: Path) -> Dict[str, Any]: 31 | """Read yaml file and return a dict. 32 | 33 | :param path: Path to read content. 34 | """ 35 | assert path.is_file(), "Path must be a single file." 36 | content = read(path) 37 | return yaml.safe_load(content) 38 | 39 | 40 | def add_stem_suffix(path: Path, suf: str) -> Path: 41 | """Add stem suffix of path. 42 | 43 | This function add suffix to stem instead of suffix of path, for example: 44 | 45 | >>> add_stem_suffix(Path("foo/bar/baz.py"), "_test") 46 | Path("foo/bar/baz_test.py") 47 | 48 | :param path: Path to add suffix. 49 | :param suf: Suffix want to add to stem. 50 | """ 51 | stem, suffix = path.stem, path.suffix 52 | new_name = f"{stem}{suf}{suffix}" 53 | return path.with_name(new_name) 54 | 55 | 56 | def recurse_files( 57 | path: Path, include: Optional[str] = Regexp.PATH_ALL, exclude: Optional[str] = None 58 | ) -> List[Path]: 59 | """Recurse all match pattern files in path. 60 | 61 | :param path: file or directory path want to recurse. 62 | :param include: include match pattern in given path, default include all file in directory. 63 | :param exclude: include match pattern in given path, default None. 64 | """ 65 | if not path.exists(): 66 | raise ValueError("Path %s does not exist.", path) 67 | 68 | if path.is_file(): 69 | return [path] 70 | else: 71 | paths = set(path.rglob(include)) 72 | if exclude: 73 | paths = paths - set(path.rglob(exclude)) 74 | return [file for file in paths if file.is_file()] 75 | -------------------------------------------------------------------------------- /src/air2phin/utils/string.py: -------------------------------------------------------------------------------- 1 | from air2phin.constants import Keyword, Number, Token 2 | 3 | 4 | def convert_schedule(val: str) -> str: 5 | """Convert airflow schedule string to dolphinscheduler's. 6 | 7 | Will convert including: 8 | * crontab schedule string from ``5 4 * * *`` to ```0 5 4 * * ? *`` 9 | * shortcut schedule string like ``@daily`` to ``0 0 0 * * ? *``. 10 | """ 11 | if ( 12 | len(val) == Number.SCHEDULE_TOTAL_NUM 13 | and val.count(Token.SPACE) == Number.SCHEDULE_SPACE_NUM 14 | ): 15 | val_list = val.split(Token.SPACE) 16 | val_list.insert(0, Token.ZERO) 17 | val_list.insert(-1, Token.QUESTION) 18 | return Token.SPACE.join(val_list) 19 | return Keyword.DEFAULT_SCHEDULE 20 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/tests/__init__.py -------------------------------------------------------------------------------- /tests/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/tests/cli/__init__.py -------------------------------------------------------------------------------- /tests/cli/test_command.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import pytest 4 | 5 | from air2phin.cli.command import build_argparse 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "argv, expect", 10 | [ 11 | ( 12 | ["migrate", "file"], 13 | { 14 | "subcommand": "migrate", 15 | "inplace": False, 16 | }, 17 | ), 18 | ( 19 | ["migrate", "--inplace", "file"], 20 | { 21 | "subcommand": "migrate", 22 | "inplace": True, 23 | }, 24 | ), 25 | ], 26 | ) 27 | def test_command_args(argv: List[str], expect: Dict): 28 | parser = build_argparse() 29 | args = parser.parse_args(argv) 30 | assert all(val == getattr(args, key) for key, val in expect.items()) 31 | -------------------------------------------------------------------------------- /tests/rules/EdgeCases.yaml: -------------------------------------------------------------------------------- 1 | test_cases: 2 | # --------------------------------------------- 3 | # Edge cases 4 | # --------------------------------------------- 5 | # parameter value is the keyword migration 6 | value_same_as_name: 7 | src: | 8 | from airflow.operators.bash import BashOperator 9 | 10 | bash = BashOperator( 11 | task_id='bash_command', 12 | bash_command='task_id', 13 | ) 14 | dest: | 15 | from pydolphinscheduler.tasks.shell import Shell 16 | 17 | bash = Shell( 18 | name='bash_command', 19 | command='task_id', 20 | ) 21 | # without import statement 22 | without_import_statement: 23 | src: | 24 | bash = BashOperator( 25 | task_id='bash', 26 | bash_command='echo "Hello World!"', 27 | ) 28 | dest: | 29 | bash = BashOperator( 30 | task_id='bash', 31 | bash_command='echo "Hello World!"', 32 | ) 33 | # rule default values but not end with comma in previous line 34 | add_default_not_comma_previous_line: 35 | src: | 36 | from airflow.operators.dummy_operator import DummyOperator 37 | 38 | bash = DummyOperator( 39 | task_id='dummy', 40 | abc='def' 41 | ) 42 | dest: | 43 | from pydolphinscheduler.tasks.shell import Shell 44 | 45 | bash = Shell( 46 | name='dummy', 47 | abc='def', 48 | command="echo 'Airflow DummyOperator'" 49 | ) 50 | # have with itme but not airflow.DAG 51 | have_not_dag_with_item: 52 | src: | 53 | from contextlib import closing 54 | from airflow.operators.python import PythonOperator 55 | from airflow.providers.postgres.hooks.postgres import PostgresHook 56 | 57 | def demo(): 58 | connection = PostgresHook.get_connection('postgres_default') 59 | hook = PostgresHook(connection=connection) 60 | with closing(hook.get_conn()) as conn: 61 | with closing(conn.cursor()) as cursor: 62 | cursor.execute('SELECT 1') 63 | print(cursor.fetchall()) 64 | 65 | demo = PythonOperator( 66 | task_id='demo', 67 | python_callable=demo, 68 | ) 69 | dest: | 70 | from contextlib import closing 71 | from pydolphinscheduler.tasks.python import Python 72 | from air2phin.fake.hooks.postgres import PostgresHook 73 | 74 | def demo(): 75 | connection = PostgresHook.get_connection('postgres_default') 76 | hook = PostgresHook(connection=connection) 77 | with closing(hook.get_conn()) as conn: 78 | with closing(conn.cursor()) as cursor: 79 | cursor.execute('SELECT 1') 80 | print(cursor.fetchall()) 81 | 82 | demo = Python( 83 | name='demo', 84 | definition=demo, 85 | ) 86 | -------------------------------------------------------------------------------- /tests/rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WhaleOps/air2phin/33d9247fb219ed12c24dfcf9063cf00504f73ca0/tests/rules/__init__.py -------------------------------------------------------------------------------- /tests/rules/test_example.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | import pytest 5 | 6 | from air2phin.constants import ConfigKey 7 | from air2phin.core.rules.config import Config 8 | from air2phin.core.rules.loader import build_in_rules 9 | from air2phin.runner import Runner 10 | from air2phin.utils.file import read_yaml 11 | 12 | rules_dir = Path(__file__).parent 13 | test_cases_rules: List[Path] = list(rules_dir.glob("*.yaml")) 14 | 15 | FLAG_TEST_CASE = "test_cases" 16 | 17 | 18 | @pytest.mark.parametrize("rule_ex", build_in_rules()) 19 | def test_rules_example(rule_ex: Path) -> None: 20 | runner = Runner(Config()) 21 | contents = read_yaml(rule_ex) 22 | cases = contents.get(ConfigKey.EXAMPLE) 23 | for name, case in cases.items(): 24 | src = case.get(ConfigKey.SOURCE) 25 | dest = case.get(ConfigKey.DESTINATION) 26 | assert ( 27 | runner.with_str(src) == dest 28 | ), f"Migrate test case {rule_ex.stem}.{name} failed." 29 | 30 | 31 | @pytest.mark.parametrize("rule_ex", test_cases_rules) 32 | def test_test_rules_example(rule_ex: Path) -> None: 33 | runner = Runner(Config()) 34 | contents = read_yaml(rule_ex) 35 | cases = contents.get(FLAG_TEST_CASE) 36 | for name, case in cases.items(): 37 | src = case.get(ConfigKey.SOURCE) 38 | dest = case.get(ConfigKey.DESTINATION) 39 | assert dest == runner.with_str( 40 | src 41 | ), f"Migrate test case {rule_ex.stem}.{name} failed." 42 | -------------------------------------------------------------------------------- /tests/rules/test_rules.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from air2phin.constants import ConfigKey 4 | from air2phin.core.rules.config import Config 5 | from air2phin.core.rules.loader import path_rule 6 | from air2phin.utils.file import read_yaml 7 | 8 | ROOT_MUST_HAVE_ATTR = ["name", "description", "migration", "examples"] 9 | EXAMPLES_MUST_HAVE_ATTR = ["description", "src", "dest"] 10 | 11 | all_rules = [path for path in path_rule.glob("**/*") if path.is_file()] 12 | 13 | 14 | def test_suffix() -> None: 15 | for rule in all_rules: 16 | assert rule.suffix == ".yaml", f"Rule file {rule} must have suffix .yaml" 17 | 18 | 19 | def test_file_must_have_attr() -> None: 20 | for rule in all_rules: 21 | content = read_yaml(rule) 22 | for attr in ROOT_MUST_HAVE_ATTR: 23 | assert attr in content, f"Rule file {rule} must have attribute {attr}" 24 | 25 | 26 | def module_add_rm(action: Dict[str, Any]) -> bool: 27 | return ConfigKey.MODULE in action 28 | 29 | 30 | def test_module_action_type() -> None: 31 | for rule in all_rules: 32 | content = read_yaml(rule) 33 | migration = content["migration"] 34 | 35 | # will raise error if more than one action in :func:``get_module_action`` 36 | Config.get_module_action(migration, ConfigKey.KW_REPLACE) 37 | 38 | add = Config.get_module_action(migration, ConfigKey.KW_ADD) 39 | if add: 40 | assert isinstance( 41 | add[ConfigKey.MODULE], (str, list) 42 | ), f"Rule file {rule} `add` action value must with type str or list." 43 | 44 | remove = Config.get_module_action(migration, ConfigKey.KW_REMOVE) 45 | if remove: 46 | assert isinstance( 47 | remove[ConfigKey.MODULE], (str, list) 48 | ), f"Rule file {rule} `remove` action value must with type str or list." 49 | 50 | 51 | def test_module_action_attr() -> None: 52 | for rule in all_rules: 53 | content = read_yaml(rule) 54 | actions = content["migration"]["module"] 55 | for action in actions: 56 | assert ( 57 | ConfigKey.ACTION in action 58 | ), "Rule {rule} module each item must have attr action." 59 | if action[ConfigKey.ACTION] in {ConfigKey.KW_REMOVE, ConfigKey.KW_ADD}: 60 | assert module_add_rm( 61 | action 62 | ), "Rule {rule} module action `remove` or `add` do not have must exits attr." 63 | elif action[ConfigKey.ACTION] == ConfigKey.KW_REPLACE: 64 | assert action_replace( 65 | action 66 | ), "Rule {rule} parameter action `replace` do not have must exits attr." 67 | else: 68 | raise ValueError( 69 | "Rule {rule} parameter action must with specific value." 70 | ) 71 | 72 | 73 | def test_module_action_replace() -> None: 74 | for rule in all_rules: 75 | content = read_yaml(rule) 76 | migration = content["migration"] 77 | if "module" in migration: 78 | replace = Config.get_module_action(migration, ConfigKey.KW_REPLACE) 79 | remove = Config.get_module_action(migration, ConfigKey.KW_REMOVE) 80 | if remove is None: 81 | assert ( 82 | "src" in replace 83 | ), f"Rule file {rule} migration.module pair key `src` not exists." 84 | assert ( 85 | "dest" in replace 86 | ), f"Rule file {rule} migration.module pair key `dest` not exists." 87 | 88 | 89 | def test_module_action_replace_src_list_or_str() -> None: 90 | for rule in all_rules: 91 | content = read_yaml(rule) 92 | migration = content["migration"] 93 | if "module" in migration: 94 | replace = Config.get_module_action(migration, ConfigKey.KW_REPLACE) 95 | remove = Config.get_module_action(migration, ConfigKey.KW_REMOVE) 96 | if remove is None: 97 | src = replace["src"] 98 | assert isinstance( 99 | src, (list, str) 100 | ), f"Rule file {rule} migration.module.src must be list or str." 101 | 102 | 103 | def test_module_action_replace_src_duplicate() -> None: 104 | exists = set() 105 | for rule in all_rules: 106 | content = read_yaml(rule) 107 | migration = content["migration"] 108 | if "module" in migration: 109 | replace = Config.get_module_action(migration, ConfigKey.KW_REPLACE) 110 | remove = Config.get_module_action(migration, ConfigKey.KW_REMOVE) 111 | if remove is None: 112 | src = replace["src"] 113 | if isinstance(src, list): 114 | for s in src: 115 | assert ( 116 | s not in exists 117 | ), f"Rule file {rule} migration.module.src {s} duplicate." 118 | exists.add(s) 119 | elif isinstance(src, str): 120 | assert ( 121 | src not in exists 122 | ), f"Rule file {rule} migration.module.src {src} duplicate." 123 | exists.add(src) 124 | 125 | 126 | def test_example_must_attr() -> None: 127 | for rule in all_rules: 128 | content = read_yaml(rule) 129 | examples = content["examples"] 130 | for key in examples: 131 | example = examples[key] 132 | assert all( 133 | attr in example for attr in EXAMPLES_MUST_HAVE_ATTR 134 | ), f"Rule file {rule} examples missing must have attribute {EXAMPLES_MUST_HAVE_ATTR}" 135 | 136 | 137 | def test_param_action_type() -> None: 138 | for rule in all_rules: 139 | content = read_yaml(rule) 140 | parameter = content["migration"].get("parameter", []) 141 | for params in parameter: 142 | assert ( 143 | ConfigKey.ACTION in params 144 | ), "Rule {rule} all parameter must have attr action." 145 | if params[ConfigKey.ACTION] == ConfigKey.KW_ADD: 146 | assert param_action_add( 147 | params 148 | ), "Rule {rule} parameter action `add` do not have must exits attr." 149 | elif params[ConfigKey.ACTION] == ConfigKey.KW_REMOVE: 150 | assert param_action_remove( 151 | params 152 | ), "Rule {rule} parameter action `remove` do not have must exits attr." 153 | elif params[ConfigKey.ACTION] == ConfigKey.KW_REPLACE: 154 | assert action_replace( 155 | params 156 | ), "Rule {rule} parameter action `replace` do not have must exits attr." 157 | else: 158 | raise ValueError( 159 | "Rule {rule} parameter action must with specific value." 160 | ) 161 | 162 | 163 | def action_replace(param: Dict[str, Any]) -> bool: 164 | return ConfigKey.SOURCE in param and ConfigKey.DESTINATION in param 165 | 166 | 167 | def param_action_add(param: Dict[str, Any]) -> bool: 168 | return ( 169 | ConfigKey.ARGUMENT in param 170 | and ConfigKey.DEFAULT in param 171 | and ConfigKey.TYPE in param[ConfigKey.DEFAULT] 172 | and ConfigKey.VALUE in param[ConfigKey.DEFAULT] 173 | ) 174 | 175 | 176 | def param_action_remove(param: Dict[str, Any]) -> bool: 177 | return ConfigKey.ARGUMENT in param 178 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [tox] 19 | envlist = auto-lint, lint, code-test, local-ci, doc-build, py{37,38,39,310,311,312} 20 | 21 | [testenv] 22 | allowlist_externals = 23 | make 24 | git 25 | 26 | [testenv:auto-lint] 27 | extras = dev 28 | commands = 29 | python -m isort . 30 | python -m black . 31 | python -m autoflake --in-place --remove-all-unused-imports --ignore-init-module-imports --recursive . 32 | 33 | [testenv:lint] 34 | extras = dev 35 | commands = 36 | python -m isort --check . 37 | python -m black --check . 38 | python -m flake8 39 | python -m autoflake --remove-all-unused-imports --ignore-init-module-imports --check --recursive . 40 | 41 | [testenv:doc-build] 42 | extras = dev 43 | commands = 44 | make -C {toxinidir}/docs clean 45 | make -C {toxinidir}/docs html 46 | 47 | [testenv:code-test] 48 | extras = dev 49 | # Run both tests and coverage 50 | commands = 51 | python -m pytest --cov=air2phin --cov-config={toxinidir}/.coveragerc --cov-report term --cov-report xml:coverage.xml tests/ 52 | 53 | [testenv:local-ci] 54 | extras = dev 55 | commands = 56 | {[testenv:lint]commands} 57 | {[testenv:code-test]commands} 58 | {[testenv:doc-build]commands} 59 | --------------------------------------------------------------------------------