├── tests ├── compatibility │ ├── __init__.py │ └── test_retro_compatibility.py ├── unit │ ├── __init__.py │ ├── hooks │ │ ├── __init__.py │ │ ├── test_carte.py │ │ └── test_kettle.py │ └── operators │ │ ├── __init__.py │ │ ├── test_pan.py │ │ ├── test_kitchen.py │ │ ├── test_carte_job.py │ │ └── test_carte_trans.py ├── integration │ ├── __init__.py │ ├── hooks │ │ ├── __init__.py │ │ └── test_carte.py │ └── operators │ │ ├── __init__.py │ │ ├── test_kitchen.py │ │ ├── test_carte_job.py │ │ ├── test_pan.py │ │ └── test_carte_trans.py ├── __init__.py ├── operator_test_base.py └── assets │ ├── test_job.kjb │ └── test_trans.ktr ├── requirements.txt ├── scripts └── test.sh ├── airflow_pentaho ├── operators │ ├── PanOperator.py │ ├── CarteJobOperator.py │ ├── KitchenOperator.py │ ├── CarteTransOperator.py │ ├── __init__.py │ ├── carte.py │ └── kettle.py ├── __init__.py ├── hooks │ ├── __init__.py │ ├── kettle.py │ └── carte.py └── plugin.py ├── .pre-commit-config.yaml ├── .github └── workflows │ └── python-publish.yml ├── .travis.yml ├── CHANGELOG.md ├── setup.py ├── sample_dags └── pdi_flow.py ├── .gitignore ├── README.md ├── LICENSE.txt └── pylintrc /tests/compatibility/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | xmltodict 2 | setuptools-git-version 3 | pytest 4 | pytest-cov 5 | twine 6 | toml -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm .coverage || echo "No previous coverage files found" 3 | pytest --cov=airflow_pentaho --cov-report xml tests --ignore=tests/integration 4 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/PanOperator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pylint: disable=invalid-name 3 | """This module is deprecated. Please use `airflow_pentaho.operators.kettle`.""" 4 | 5 | import warnings 6 | 7 | # pylint: disable=unused-import 8 | from airflow_pentaho.operators.kettle import PanOperator # noqa 9 | 10 | warnings.warn( 11 | 'This module is deprecated. Please use `airflow_pentaho.operators.kettle`.', 12 | DeprecationWarning, 13 | stacklevel=2) 14 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/CarteJobOperator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pylint: disable=invalid-name 3 | """This module is deprecated. Please use `airflow_pentaho.operators.carte`.""" 4 | 5 | import warnings 6 | 7 | # pylint: disable=unused-import 8 | from airflow_pentaho.operators.carte import CarteJobOperator # noqa 9 | 10 | warnings.warn( 11 | 'This module is deprecated. Please use `airflow_pentaho.operators.carte`.', 12 | DeprecationWarning, 13 | stacklevel=2) 14 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/KitchenOperator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pylint: disable=invalid-name 3 | """This module is deprecated. Please use `airflow_pentaho.operators.kettle`.""" 4 | 5 | import warnings 6 | 7 | # pylint: disable=unused-import 8 | from airflow_pentaho.operators.kettle import KitchenOperator # noqa 9 | 10 | warnings.warn( 11 | 'This module is deprecated. Please use `airflow_pentaho.operators.kettle`.', 12 | DeprecationWarning, 13 | stacklevel=2) 14 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/CarteTransOperator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pylint: disable=invalid-name 3 | """This module is deprecated. Please use `airflow_pentaho.operators.carte`.""" 4 | 5 | import warnings 6 | 7 | # pylint: disable=unused-import 8 | from airflow_pentaho.operators.carte import CarteTransOperator # noqa 9 | 10 | warnings.warn( 11 | 'This module is deprecated. Please use `airflow_pentaho.operators.carte`.', 12 | DeprecationWarning, 13 | stacklevel=2) 14 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /airflow_pentaho/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/unit/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /airflow_pentaho/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/unit/operators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/integration/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/integration/operators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | from unittest import TestCase 18 | 19 | import tests 20 | 21 | 22 | class TestBase(TestCase): 23 | 24 | TESTS_PATH = os.path.dirname(os.path.abspath(tests.__file__)) 25 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v3.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: check-ast 7 | - id: check-case-conflict 8 | - id: check-docstring-first 9 | - id: check-merge-conflict 10 | - id: check-symlinks 11 | - id: check-yaml 12 | - id: debug-statements 13 | - id: destroyed-symlinks 14 | - id: double-quote-string-fixer 15 | - id: end-of-file-fixer 16 | - id: fix-encoding-pragma 17 | - id: double-quote-string-fixer 18 | - id: mixed-line-ending 19 | - id: trailing-whitespace 20 | - repo: https://github.com/pre-commit/mirrors-pylint 21 | rev: 'v2.6.0' 22 | hooks: 23 | - id: pylint 24 | - repo: local 25 | hooks: 26 | - id: test 27 | name: Testing 28 | pass_filenames: false 29 | language: system 30 | entry: pytest --cov=airflow_pentaho tests/unit 31 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.8' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install -r requirements.txt 33 | pip install build 34 | - name: Build package 35 | run: python -m build 36 | - name: Publish package 37 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 38 | with: 39 | user: __token__ 40 | password: ${{ secrets.PYPI_API_TOKEN }} 41 | -------------------------------------------------------------------------------- /airflow_pentaho/plugin.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Plugin""" 16 | 17 | 18 | from airflow.plugins_manager import AirflowPlugin 19 | from airflow_pentaho.hooks.carte import PentahoCarteHook 20 | from airflow_pentaho.hooks.kettle import PentahoHook 21 | from airflow_pentaho.operators.carte import CarteJobOperator 22 | from airflow_pentaho.operators.carte import CarteTransOperator 23 | from airflow_pentaho.operators.kettle import KitchenOperator 24 | from airflow_pentaho.operators.kettle import PanOperator 25 | 26 | 27 | class PentahoPlugin(AirflowPlugin): 28 | name = 'airflow_pentaho' 29 | operators = [KitchenOperator, PanOperator, 30 | CarteJobOperator, CarteTransOperator] 31 | hooks = [PentahoHook, PentahoCarteHook] 32 | -------------------------------------------------------------------------------- /tests/unit/operators/test_pan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from unittest.mock import MagicMock 18 | 19 | from airflow_pentaho.operators.kettle import PanOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class TestPanOperator(OperatorTestBase): 24 | """Testing Pan Operator""" 25 | 26 | def test_return_value(self): 27 | op = PanOperator( 28 | task_id='test_mocked_pan_operator', 29 | xcom_push=False, 30 | directory='/home', 31 | trans='test_trans', 32 | params={'a': '1'}) 33 | 34 | mocked_cli = MagicMock() 35 | mocked_cli.build_command.return_value = \ 36 | """echo This is a mocked result""" 37 | # pylint: disable=protected-access 38 | op._get_pentaho_client = MagicMock(return_value=mocked_cli) 39 | 40 | return_value = op.execute(context={}) 41 | self.assertIsNone(return_value) 42 | -------------------------------------------------------------------------------- /tests/unit/operators/test_kitchen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from unittest.mock import MagicMock 18 | 19 | from airflow_pentaho.operators.kettle import KitchenOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class TestKitchenOperator(OperatorTestBase): 24 | """Test Kitchen Operator""" 25 | 26 | def test_return_value(self): 27 | op = KitchenOperator( 28 | task_id='test_kitchen_operator', 29 | xcom_push=False, 30 | directory='/home', 31 | job='test_job', 32 | params={'a': '1'}) 33 | 34 | mocked_cli = MagicMock() 35 | mocked_cli.build_command.return_value = \ 36 | """echo This is a mocked result""" 37 | # pylint: disable=protected-access 38 | op._get_pentaho_client = MagicMock(return_value=mocked_cli) 39 | 40 | return_value = op.execute(context={}) 41 | self.assertIsNone(return_value) 42 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | dist: focal 4 | 5 | python: 6 | - "3.8" 7 | - "3.9" 8 | # - "3.6" 9 | # - "3.7" 10 | 11 | install: 12 | - export AIRFLOW_VERSION=2.4.0 13 | - export PYTHON_VERSION="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" 14 | - export CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt" 15 | - pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" 16 | - pip install -r requirements.txt 17 | - pip install pre-commit 18 | - airflow db init 19 | 20 | script: 21 | - pre-commit run --all-files 22 | - pip install . # Check package setup 23 | 24 | jobs: 25 | include: 26 | - stage: deploy 27 | python: 3.8 28 | deploy: 29 | provider: pypi 30 | user: "__token__" 31 | password: 32 | secure: VkXM0rArpus0FXeCaebUiVSNhn5DvPa1vE8cvubyJb0OX83PFPLitiJhPKncCFx071hhjRiPRGeU4WCBl7p/aVjVv6tTrb32jFQl8MbV+0GcfSu35Mca4VSyFNzWmpY1JDonJ/RN2k/Sj1ipkFts0iQ//MBERp7lmNBul9eTXQg9Fdc2QAOj/SXYiBCp/R65sh1jXwT/ylCTKafGbIx6XAcJQVYt6rbxBCUOf6QQnasrW9E+yQMl3Q1uNqPUNWZPJNqhWehxubtkquDg/+CAk5Mmm+o1NSVElF0wKyxYd/55vbSRkCku3b87tYOiwpl1RxwJ/Mc9EiOmaqOSdY/JKY9osu9B15ji5IzOmhpt8AHc0iTgDrT3p6o/nTviVlmuZdzJTS3q3q6vcz2JFe2VpFUb2/8I+4Ay5lC7+jzF/XNg4QIiCAuxEwH2ZtC5wwt8uYWi6YCiB1UYOdvIdos3KgZHyTW/QRjLT9sJ7DlQP69UZ9yOvLLFqzrkc3PlGYlzK9nqVzucRI06vJpHRknzv9XHJUUovvB+7TI+nU3re7Gkyh/n1+LZH2+g1DLbxj+pV5WV62EYyaiMiN/ROmN0lVC1drI4LcZ9po6RWiUDnrirVOkUMm3cxrUImy46FIc0uxDNkAI0URZP5gBTYYEfEMw8nBDwONTAvTCzbW50zaw= 33 | on: 34 | branch: master 35 | tags: true 36 | 37 | after_success: 38 | - bash <(curl -s https://codecov.io/bash) 39 | -------------------------------------------------------------------------------- /tests/integration/operators/test_kitchen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from airflow_pentaho.operators.kettle import KitchenOperator 18 | from tests.operator_test_base import OperatorTestBase 19 | 20 | 21 | class TestKitchenOperator(OperatorTestBase): 22 | """Test Kitchen Operator""" 23 | 24 | def test_return_value(self): 25 | op = KitchenOperator( 26 | task_id='test_kitchen_operator', 27 | xcom_push=True, 28 | directory='/home/test', 29 | job='test_job', 30 | params={'a': '1'}) 31 | 32 | return_value = op.execute(context={}) 33 | self.assertTrue('Kitchen - Processing ended' in return_value) 34 | 35 | def test_return_value_file(self): 36 | op = KitchenOperator( 37 | task_id='test_kitchen_operator', 38 | xcom_push=True, 39 | file=self.TESTS_PATH + '/assets/test_job.kjb', 40 | job='test_job', 41 | params={'a': '1'}) 42 | 43 | return_value = op.execute(context={}) 44 | self.assertTrue('Kitchen - Processing ended' in return_value) 45 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## airflow-pentaho-plugin v1.0.8 - 2021-05-18 4 | 5 | - Fixes Log Level Problem [#9](https://github.com/damavis/airflow-pentaho-plugin/issues/9) 6 | 7 | ## airflow-pentaho-plugin v1.0.7 - 2021-04-29 8 | 9 | - Advanced statuses handling. (by [@IM-SIM](https://github.com/IM-SIM)) 10 | 11 | ## airflow-pentaho-plugin v1.0.6 - 2021-04-16 12 | 13 | - Backported to Airflow version 1.10.x. 14 | - Parameters format for kettle Operators fixed. 15 | - Testing on pre-commit hook added. 16 | - xmldtodict bumped from 0.10.0 to 0.12.0. 17 | - Pypi classifiers fixed. 18 | - Style fixes. 19 | - Add CI/CD testing for python 3.6, 3.7, 3.8 and 3.9. 20 | - Pinning version of SQLAchemy<1.4,>=1.3.18. 21 | 22 | ## airflow-pentaho-plugin v1.0.1 - 2021-02-17 23 | 24 | ### Deprecations 25 | 26 | - Operators have been reorganized into 2 modules, and the old modules had been 27 | marked as deprecated and will be removed in the future. 28 | 29 | - Kettle 30 | - `airflow_pentaho.hooks.PentahoHook -> airflow_pentaho.hooks.kettle` 31 | - `airflow_pentaho.operators.PDIBaseOperator -> airflow_pentaho.operators.kettle` 32 | - `airflow_pentaho.operators.KitchenOperator -> airflow_pentaho.operators.kettle` 33 | - `airflow_pentaho.operators.PanOperator -> airflow_pentaho.operators.kettle` 34 | - Carte 35 | - `airflow_pentaho.hooks.PentahoCarteHook -> airflow_pentaho.hooks.carte` 36 | - `airflow_pentaho.operators.CarteBaseOperator -> airflow_pentaho.operators.carte` 37 | - `airflow_pentaho.operators.CarteJobOperator -> airflow_pentaho.operators.carte` 38 | - `airflow_pentaho.operators.CarteTransOperator -> airflow_pentaho.operators.carte` 39 | 40 | ### Fixes 41 | 42 | - Allow users to choose http:// or https:// for Carte host. 43 | - Other minor fixes. 44 | -------------------------------------------------------------------------------- /tests/integration/operators/test_carte_job.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from airflow import AirflowException 18 | 19 | from airflow_pentaho.operators.carte import CarteJobOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class TestCarteJobOperator(OperatorTestBase): 24 | """Test Carte Job Operator""" 25 | 26 | def test_execute(self): 27 | op = CarteJobOperator( 28 | task_id='test_carte_job_operator', 29 | job='/home/bi/test_job', 30 | level='Debug') 31 | 32 | try: 33 | op.execute(context={}) 34 | except Exception as ex: 35 | raise ex 36 | 37 | def test_execute_non_existent_job(self): 38 | op = CarteJobOperator( 39 | task_id='test_carte_job_operator', 40 | job='/home/bi/unknown_job', 41 | level='Debug') 42 | 43 | with self.assertRaises(AirflowException) as context: 44 | op.execute(context={}) 45 | 46 | print(context.exception) 47 | self.assertTrue('ERROR: Unable to find job [unknown_job]' 48 | in str(context.exception)) 49 | -------------------------------------------------------------------------------- /tests/integration/operators/test_pan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import unittest 18 | 19 | from airflow_pentaho.operators.kettle import PanOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class TestPanOperator(OperatorTestBase): 24 | """Testing Pan Operator""" 25 | 26 | def test_return_value(self): 27 | op = PanOperator( 28 | task_id='test_pan_operator', 29 | xcom_push=True, 30 | directory='/home/test', 31 | trans='test_trans', 32 | params={'a': '1'}) 33 | 34 | return_value = op.execute(context={}) 35 | self.assertTrue('ended successfully' in return_value) 36 | 37 | @unittest.expectedFailure # Transformation XML is not valid, error 38 | def test_return_value_file(self): 39 | op = PanOperator( 40 | task_id='test_pan_operator', 41 | xcom_push=True, 42 | file=self.TESTS_PATH + '/assets/test_trans.kjb', 43 | trans='test_trans', 44 | safemode=True, 45 | params={'a': '1'}) 46 | 47 | return_value = op.execute(context={}) 48 | self.assertTrue('ended successfully' in return_value) 49 | -------------------------------------------------------------------------------- /tests/integration/operators/test_carte_trans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from airflow import AirflowException 18 | 19 | from airflow_pentaho.operators.carte import CarteTransOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class TestCarteTransOperator(OperatorTestBase): 24 | """Test Carte Transformation Operator""" 25 | 26 | def test_execute(self): 27 | op = CarteTransOperator( 28 | task_id='test_carte_trans_operator', 29 | trans='/home/bi/test_trans', 30 | level='Debug') 31 | 32 | op.execute(context={}) 33 | try: 34 | op.execute(context={}) 35 | except Exception as ex: 36 | raise ex 37 | 38 | def test_execute_non_existent_job(self): 39 | op = CarteTransOperator( 40 | task_id='test_carte_trans_operator', 41 | trans='/home/bi/unknown_trans', 42 | level='Debug') 43 | 44 | with self.assertRaises(AirflowException) as context: 45 | op.execute(context={}) 46 | 47 | print(context.exception) 48 | self.assertTrue('ERROR: Unable to find job [unknown_job]' 49 | in str(context.exception)) 50 | -------------------------------------------------------------------------------- /tests/operator_test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from airflow import settings 18 | from airflow.models import Connection 19 | 20 | from tests import TestBase 21 | 22 | 23 | class OperatorTestBase(TestBase): 24 | """Operator Test Base""" 25 | 26 | conn_id = None 27 | 28 | @classmethod 29 | def setUpClass(cls): 30 | super().setUpClass() 31 | conn = Connection(conn_id='pdi_default') 32 | 33 | extra = """ 34 | { 35 | "rep": "Default", 36 | "pentaho_home": "/opt/pentaho", 37 | "carte_username": "cluster", 38 | "carte_password": "cluster" 39 | } 40 | """ 41 | 42 | session = settings.Session() 43 | 44 | try: 45 | if not conn.login: 46 | conn = Connection( 47 | conn_type='pentaho', 48 | conn_id='pdi_default', 49 | host='localhost', 50 | port=8880, 51 | login='admin', 52 | password='password', 53 | extra=extra 54 | ) 55 | session.add(conn) 56 | session.commit() 57 | except Exception as ex: # pylint: disable=broad-except 58 | print(ex) 59 | session.rollback() 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Setup""" 16 | 17 | 18 | from setuptools import find_packages, setup 19 | 20 | from os import path 21 | this_directory = path.abspath(path.dirname(__file__)) 22 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: 23 | long_description = f.read() 24 | 25 | setup( 26 | name='airflow-pentaho-plugin', 27 | version='1.2.3', 28 | license='Apache 2.0', 29 | author='Damavis', 30 | author_email='info@damavis.com', 31 | long_description=long_description, 32 | long_description_content_type='text/markdown', 33 | url='https://github.com/damavis/airflow-pentaho-plugin', 34 | python_requires='>=3', 35 | test_suite='nose.collector', 36 | zip_safe=False, 37 | include_package_data=True, 38 | packages=find_packages('.', exclude=['tests', 'tests.*']), 39 | classifiers=[ 40 | 'Environment :: Plugins', 41 | 'License :: OSI Approved :: Apache Software License', 42 | 'Operating System :: Unix', 43 | 'Operating System :: MacOS', 44 | 'Operating System :: Microsoft :: Windows', 45 | 'Programming Language :: Python :: 3.6', 46 | 'Programming Language :: Python :: 3.7', 47 | 'Programming Language :: Python :: 3.8', 48 | 'Programming Language :: Python :: 3.9' 49 | ], 50 | install_requires=[ 51 | 'xmltodict >= 0.12.0', 52 | ], 53 | entry_points={ 54 | 'airflow.plugins': [ 55 | 'airflow_pentaho = airflow_pentaho.plugin:PentahoPlugin' 56 | ] 57 | } 58 | ) 59 | -------------------------------------------------------------------------------- /tests/unit/hooks/test_carte.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from unittest import TestCase 18 | 19 | from airflow_pentaho.hooks.carte import PentahoCarteHook 20 | 21 | DEFAULT_HOST = 'https://localhost' 22 | DEFAULT_PORT = 8880 23 | DEFAULT_REP = 'DEFAULT' 24 | DEFAULT_CARTE_USERNAME = 'cluster' 25 | DEFAULT_CARTE_PASSWORD = 'cluster' 26 | DEFAULT_REP_USERNAME = 'admin' 27 | DEFAULT_REP_PASSWORD = 'password' 28 | 29 | 30 | class TestPentahoCarteClient(TestCase): 31 | """Test Carte Client""" 32 | 33 | def test_cli_constructor(self): 34 | cli = PentahoCarteHook.PentahoCarteClient(DEFAULT_HOST, 35 | DEFAULT_PORT, 36 | DEFAULT_REP, 37 | DEFAULT_REP_USERNAME, 38 | DEFAULT_REP_PASSWORD, 39 | DEFAULT_CARTE_USERNAME, 40 | DEFAULT_CARTE_PASSWORD, 41 | level='Basic') 42 | self.assertEqual(cli.host, DEFAULT_HOST) 43 | self.assertEqual(cli.port, DEFAULT_PORT) 44 | self.assertEqual(cli.rep, DEFAULT_REP) 45 | self.assertEqual(cli.username, DEFAULT_REP_USERNAME) 46 | self.assertEqual(cli.password, DEFAULT_REP_PASSWORD) 47 | self.assertEqual(cli.carte_username, DEFAULT_CARTE_USERNAME) 48 | self.assertEqual(cli.carte_password, DEFAULT_CARTE_PASSWORD) 49 | self.assertEqual(cli.host, DEFAULT_HOST) 50 | self.assertEqual(cli.level, 'Basic') 51 | -------------------------------------------------------------------------------- /tests/compatibility/test_retro_compatibility.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Import exposed operators to ensure retro compatibility""" 16 | 17 | from airflow_pentaho.operators.KitchenOperator import KitchenOperator 18 | from airflow_pentaho.operators.PanOperator import PanOperator 19 | from airflow_pentaho.operators.CarteJobOperator import CarteJobOperator 20 | from airflow_pentaho.operators.CarteTransOperator import CarteTransOperator 21 | 22 | from tests.operator_test_base import OperatorTestBase 23 | 24 | 25 | class TestCompatibility(OperatorTestBase): 26 | """ 27 | Testing deprecated operators to keep compatibility. 28 | Will be removed in version 2.x. 29 | """ 30 | 31 | def test_execute(self): 32 | op1 = KitchenOperator(task_id='test_kitchen_operator', 33 | xcom_push=True, 34 | directory='/home', 35 | job='test_job', 36 | params={'a': '1'}) 37 | self.assertIsNotNone(op1) 38 | 39 | op2 = PanOperator(task_id='test_mocked_pan_operator', 40 | xcom_push=True, 41 | directory='/home', 42 | trans='test_trans', 43 | params={'a': '1'}) 44 | self.assertIsNotNone(op2) 45 | 46 | op3 = CarteJobOperator(task_id='test_carte_job_operator', 47 | job='/home/bi/test_job', 48 | level='Debug') 49 | self.assertIsNotNone(op3) 50 | 51 | op4 = CarteTransOperator(task_id='test_carte_trans_operator', 52 | trans='/home/bi/test_trans', 53 | level='Debug') 54 | self.assertIsNotNone(op4) 55 | -------------------------------------------------------------------------------- /sample_dags/pdi_flow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Example usage""" 16 | 17 | 18 | from datetime import timedelta 19 | 20 | from airflow import DAG 21 | from airflow.utils.dates import days_ago 22 | 23 | from airflow_pentaho.operators.kettle import KitchenOperator 24 | from airflow_pentaho.operators.kettle import PanOperator 25 | from airflow_pentaho.operators.carte import CarteJobOperator 26 | from airflow_pentaho.operators.carte import CarteTransOperator 27 | 28 | DAG_NAME = 'pdi_flow' 29 | DEFAULT_ARGS = { 30 | 'owner': 'Airflow', 31 | 'depends_on_past': False, 32 | 'start_date': days_ago(2), 33 | 'email': ['airflow@example.com'], 34 | 'retries': 3, 35 | 'retry_delay': timedelta(minutes=10), 36 | 'email_on_failure': False, 37 | 'email_on_retry': False 38 | } 39 | 40 | with DAG(dag_id=DAG_NAME, 41 | default_args=DEFAULT_ARGS, 42 | dagrun_timeout=timedelta(hours=2), 43 | schedule_interval='30 0 * * *') as dag: 44 | 45 | job1 = KitchenOperator( 46 | dag=dag, 47 | task_id='job1', 48 | xcom_push=True, 49 | directory='/home/bi', 50 | job='test_job', 51 | params={'date': '{{ ds }}'}) 52 | 53 | trans1 = PanOperator( 54 | dag=dag, 55 | task_id='trans1', 56 | xcom_push=True, 57 | directory='/home/bi', 58 | trans='test_trans', 59 | params={'date': '{{ ds }}'}) 60 | 61 | trans2 = CarteTransOperator( 62 | dag=dag, 63 | task_id='trans2', 64 | trans='/home/bi/test_trans', 65 | params={'date': '{{ ds }}'}) 66 | 67 | job3 = CarteJobOperator( 68 | dag=dag, 69 | task_id='job3', 70 | job='/home/bi/test_job', 71 | params={'date': '{{ ds }}'}) 72 | 73 | job1 >> trans1 >> trans2 >> job3 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### PyCharm+all ### 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | bin/clear_data_collections/env 6 | 7 | bin/clear_merge_collection/env 8 | 9 | bin/copy_cl2_from_live_to_des/env 10 | 11 | src/merge/conf/env 12 | 13 | # CMake 14 | cmake-build-*/ 15 | 16 | # File-based project format 17 | *.iws 18 | 19 | # IntelliJ 20 | out/ 21 | 22 | # mpeltonen/sbt-idea plugin 23 | .idea_modules/ 24 | 25 | ### PyCharm+all Patch ### 26 | # Ignores the whole .idea folder and all .iml files 27 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 28 | 29 | .idea/ 30 | 31 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 32 | 33 | *.iml 34 | modules.xml 35 | .idea/misc.xml 36 | *.ipr 37 | 38 | ### Python ### 39 | # Byte-compiled / optimized / DLL files 40 | __pycache__/ 41 | *.py[cod] 42 | *$py.class 43 | 44 | # C extensions 45 | *.so 46 | 47 | # Distribution / packaging 48 | .Python 49 | build/ 50 | develop-eggs/ 51 | dist/ 52 | downloads/ 53 | eggs/ 54 | .eggs/ 55 | lib/ 56 | lib64/ 57 | parts/ 58 | sdist/ 59 | wheels/ 60 | *.egg-info/ 61 | .installed.cfg 62 | *.egg 63 | MANIFEST 64 | 65 | # PyInstaller 66 | # Usually these files are written by a python script from a template 67 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 68 | *.manifest 69 | *.spec 70 | 71 | # Installer logs 72 | pip-log.txt 73 | pip-delete-this-directory.txt 74 | 75 | # Unit test / coverage reports 76 | htmlcov/ 77 | .tox/ 78 | .nox/ 79 | .coverage 80 | .coverage.* 81 | .cache 82 | nosetests.xml 83 | coverage.xml 84 | *.cover 85 | .hypothesis/ 86 | .pytest_cache/ 87 | cover 88 | unit-coverage.xml 89 | xunit.xml 90 | 91 | 92 | 93 | # Translations 94 | *.mo 95 | *.pot 96 | 97 | # Django stuff: 98 | *.log 99 | local_settings.py 100 | db.sqlite3 101 | 102 | # Flask stuff: 103 | instance/ 104 | .webassets-cache 105 | 106 | # Scrapy stuff: 107 | .scrapy 108 | 109 | # Sphinx documentation 110 | docs/_build/ 111 | 112 | # PyBuilder 113 | target/ 114 | 115 | # Jupyter Notebook 116 | .ipynb_checkpoints 117 | 118 | # IPython 119 | profile_default/ 120 | ipython_config.py 121 | 122 | # pyenv 123 | .python-version 124 | 125 | # celery beat schedule file 126 | celerybeat-schedule 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env 133 | .venv 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | ### Python Patch ### 156 | .venv/ 157 | -------------------------------------------------------------------------------- /tests/integration/hooks/test_carte.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import time 18 | from unittest import TestCase 19 | 20 | from airflow_pentaho.hooks.carte import PentahoCarteHook 21 | 22 | DEFAULT_HOST = 'localhost' 23 | DEFAULT_PORT = 8880 24 | DEFAULT_REP = 'DEFAULT' 25 | DEFAULT_CARTE_USERNAME = 'cluster' 26 | DEFAULT_CARTE_PASSWORD = 'cluster' 27 | DEFAULT_REP_USERNAME = 'admin' 28 | DEFAULT_REP_PASSWORD = 'password' 29 | 30 | 31 | class TestPentahoCarteClient(TestCase): 32 | """Test Carte API REST Client""" 33 | 34 | def _get_cli(self): 35 | return PentahoCarteHook.PentahoCarteClient(DEFAULT_HOST, 36 | DEFAULT_PORT, 37 | DEFAULT_REP, 38 | DEFAULT_REP_USERNAME, 39 | DEFAULT_REP_PASSWORD, 40 | DEFAULT_CARTE_USERNAME, 41 | DEFAULT_CARTE_PASSWORD) 42 | 43 | def test_cli_constructor(self): 44 | cli = PentahoCarteHook.PentahoCarteClient(DEFAULT_HOST, 45 | DEFAULT_PORT, 46 | DEFAULT_REP, 47 | DEFAULT_REP_USERNAME, 48 | DEFAULT_REP_PASSWORD, 49 | DEFAULT_CARTE_USERNAME, 50 | DEFAULT_CARTE_PASSWORD, 51 | level='Basic') 52 | self.assertEqual(cli.host, DEFAULT_HOST) 53 | self.assertEqual(cli.port, DEFAULT_PORT) 54 | self.assertEqual(cli.rep, DEFAULT_REP) 55 | self.assertEqual(cli.username, DEFAULT_REP_USERNAME) 56 | self.assertEqual(cli.password, DEFAULT_REP_PASSWORD) 57 | self.assertEqual(cli.carte_username, DEFAULT_CARTE_USERNAME) 58 | self.assertEqual(cli.carte_password, DEFAULT_CARTE_PASSWORD) 59 | self.assertEqual(cli.host, DEFAULT_HOST) 60 | self.assertEqual(cli.level, 'Basic') 61 | 62 | def test_run_job_and_wait(self): 63 | cli = self._get_cli() 64 | rs = cli.run_job('/home/bi/test_job') 65 | job_id = rs['webresult']['id'] 66 | 67 | rs = {} 68 | while not rs or rs['jobstatus']['status_desc'] != 'Finished': 69 | rs = cli.job_status('test_job', job_id, rs) 70 | time.sleep(5) 71 | 72 | self.assertTrue('result' in rs['jobstatus']) 73 | 74 | def test_run_trans_and_wait(self): 75 | cli = self._get_cli() 76 | cli.run_trans('/home/bi/test_trans') 77 | 78 | rs = {} 79 | while not rs or rs['transstatus']['status_desc'] != 'Finished': 80 | rs = cli.trans_status('test_trans', rs) 81 | time.sleep(5) 82 | 83 | self.assertTrue('result' in rs['transstatus']) 84 | -------------------------------------------------------------------------------- /tests/unit/operators/test_carte_job.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from unittest import mock 18 | 19 | from airflow_pentaho.operators.carte import CarteJobOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class MockedCarteResponse: 24 | 25 | def __init__(self, response_body, status_code): 26 | self.content = response_body 27 | self.status_code = status_code 28 | 29 | 30 | def mock_requests(**kwargs): 31 | if 'executeJob' in kwargs['url']: 32 | return MockedCarteResponse(""" 33 | 34 | OK 35 | Job started 36 | f8110ea1-2283-4a65-9398-5e3ed99cd3bc 37 | """, 200) 38 | 39 | if 'jobStatus' in kwargs['url']: 40 | return MockedCarteResponse(""" 41 | 42 | dummy_job 43 | f8110ea1-2283-4a65-9398-5e3ed99cd3bc 44 | Finished 45 | 46 | 47 | 0 48 | 20 49 | 50 | 0 51 | 0 52 | 0 53 | 0 54 | 0 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | Y 61 | 0 62 | N 63 | 64 | null 65 | 66 | 67 | 68 | 69 | """, 200) 70 | 71 | 72 | class TestCarteJobOperator(OperatorTestBase): 73 | """Test Carte Job Operator""" 74 | 75 | @mock.patch('requests.get', side_effect=mock_requests) 76 | @mock.patch('requests.post', side_effect=mock_requests) 77 | def test_execute(self, mock_post, mock_get): # pylint: disable=unused-argument 78 | op = CarteJobOperator( 79 | task_id='test_carte_job_operator', 80 | xcom_push=False, 81 | job='/home/bi/test_job', 82 | level='Debug') 83 | 84 | op.execute(context={}) 85 | self.assertEqual( 86 | 'name=test_job' 87 | '&id=f8110ea1-2283-4a65-9398-5e3ed99cd3bc' 88 | '&xml=Y' 89 | '&from=0', 90 | mock_post.call_args_list[0][1]['data']) 91 | -------------------------------------------------------------------------------- /tests/unit/operators/test_carte_trans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from unittest import mock 18 | 19 | from airflow_pentaho.operators.carte import CarteTransOperator 20 | from tests.operator_test_base import OperatorTestBase 21 | 22 | 23 | class MockedCarteResponse: 24 | 25 | def __init__(self, response_body, status_code): 26 | self.content = response_body 27 | self.status_code = status_code 28 | 29 | 30 | def mock_requests(**kwargs): 31 | if 'executeTrans' in kwargs['url']: 32 | return MockedCarteResponse('', 200) 33 | 34 | if 'transStatus' in kwargs['url']: 35 | return MockedCarteResponse(""" 36 | 37 | dummy-trans 38 | c56961b2-c848-49b8-abde-76c8015e29b0 39 | Finished 40 | 41 | N 42 | 43 | Dummy (do nothing) 44 | 00 45 | 00 46 | 00 47 | 00 48 | Stopped0.0 49 | --Y 50 | N 51 | 52 | 53 | 0 54 | 37 55 | 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | 0 64 | 0 65 | 0 66 | Y 67 | 0 68 | Y 69 | 10e2c832-07da-409a-a5ba-4b90a234e957 70 | 71 | 72 | 73 | 74 | 75 | 76 | """, 200) 77 | 78 | 79 | class TestCarteTransOperator(OperatorTestBase): 80 | """Test Carte Transformation Operator""" 81 | 82 | @mock.patch('requests.get', side_effect=mock_requests) 83 | @mock.patch('requests.post', side_effect=mock_requests) 84 | def test_execute(self, mock_post, mock_get): # pylint: disable=unused-argument 85 | op = CarteTransOperator( 86 | task_id='test_carte_trans_operator', 87 | xcom_push=False, 88 | trans='/home/bi/test_trans', 89 | level='Debug') 90 | 91 | op.execute(context={}) 92 | 93 | self.assertEqual('name=test_trans&xml=Y&from=0', 94 | mock_post.call_args_list[0][1]['data']) 95 | -------------------------------------------------------------------------------- /airflow_pentaho/hooks/kettle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Kettle hook module""" 16 | 17 | 18 | import platform 19 | from packaging import version 20 | 21 | import airflow 22 | from airflow import AirflowException 23 | 24 | if version.parse(airflow.__version__) >= version.parse('2.0'): 25 | from airflow.hooks.base import BaseHook 26 | else: 27 | from airflow.hooks.base_hook import BaseHook 28 | 29 | 30 | class PentahoHook(BaseHook): 31 | """Implementation hook for interact with Kettle commands""" 32 | 33 | class PentahoClient: 34 | """Implementation for Commands calls""" 35 | 36 | def __init__( 37 | self, 38 | pentaho_home, 39 | rep, 40 | username, 41 | password, 42 | system, 43 | *args, 44 | **kwargs): 45 | super().__init__(*args, **kwargs) 46 | self.pentaho_home = pentaho_home 47 | self.rep = rep 48 | self.username = username 49 | self.password = password 50 | self.system = system 51 | 52 | def _get_tool_command_template(self): 53 | if self.system == 'Windows': 54 | return '{}\\{}.bat' 55 | elif self.system == 'Linux': 56 | return """{}/{}.sh""" 57 | else: 58 | raise AirflowException( 59 | "Unsupported platform for airflow_pentaho: '{}'" 60 | .format(self.system)) 61 | 62 | def _build_tool_command(self, command): 63 | return self._get_tool_command_template().format(self.pentaho_home, 64 | command) 65 | 66 | def _get_argument_template(self): 67 | if self.system == 'Windows': 68 | return '/{}:{}' 69 | elif self.system == 'Linux': 70 | return '-{}={}' 71 | else: 72 | raise AirflowException( 73 | "Unsupported platform for airflow_pentaho: '{}'" 74 | .format(self.system)) 75 | 76 | def _build_argument(self, key, val): 77 | return self._get_argument_template().format(key, val) 78 | 79 | def _build_connection_arguments(self): 80 | line = list() 81 | line.append(self._build_argument('rep', self.rep)) 82 | line.append(self._build_argument('user', self.username)) 83 | line.append(self._build_argument('pass', self.password)) 84 | return ' '.join(line) 85 | 86 | def build_command(self, command, arguments, params): 87 | line = [self._build_tool_command(command), 88 | self._build_connection_arguments()] 89 | for k, val in arguments.items(): 90 | line.append(self._build_argument(k, val)) 91 | if params is not None: 92 | for k, val in params.items(): 93 | if version.parse(airflow.__version__) >= version.parse('2.2') and \ 94 | not isinstance(val, str): 95 | line.append(self._build_argument(f'param:{k}', val.value)) 96 | else: 97 | line.append(self._build_argument(f'param:{k}', val)) 98 | 99 | command_line = ' '.join(line) 100 | return command_line 101 | 102 | def __init__(self, conn_id='pdi_default'): 103 | if version.parse(airflow.__version__) >= version.parse('2.0'): 104 | super().__init__() 105 | else: 106 | super().__init__(None) 107 | self.conn_id = conn_id 108 | self.connection = self.get_connection(conn_id) 109 | self.extras = self.connection.extra_dejson 110 | self.pentaho_cli = None 111 | 112 | def get_conn(self): 113 | """ 114 | Provide required object to run transformations and jobs 115 | :return: 116 | """ 117 | if self.pentaho_cli: 118 | return self.pentaho_cli 119 | 120 | self.pentaho_cli = self.PentahoClient( 121 | self.extras.get('pentaho_home'), 122 | self.extras.get('rep'), 123 | self.connection.login, 124 | self.connection.password, 125 | platform.system()) 126 | 127 | return self.pentaho_cli 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pentaho Airflow plugin 2 | 3 | [![Build Status](https://api.travis-ci.com/damavis/airflow-pentaho-plugin.svg?branch=master)](https://app.travis-ci.com/damavis/airflow-pentaho-plugin) 4 | [![codecov](https://codecov.io/gh/damavis/airflow-pentaho-plugin/branch/master/graph/badge.svg)](https://codecov.io/gh/damavis/airflow-pentaho-plugin) 5 | [![PyPI](https://img.shields.io/pypi/v/airflow-pentaho-plugin)](https://pypi.org/project/airflow-pentaho-plugin/) 6 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/airflow-pentaho-plugin)](https://pypi.org/project/airflow-pentaho-plugin/) 7 | 8 | This plugins runs Jobs and Transformations through Carte servers. 9 | It allows to orchestrate a massive number of trans/jobs taking care 10 | of the dependencies between them, even between different instances. 11 | This is done by using `CarteJobOperator` and `CarteTransOperator` 12 | 13 | It also runs Pan (transformations) and Kitchen (Jobs) in local mode, 14 | both from repository and local XML files. For this approach, use 15 | `KitchenOperator` and `PanOperator` 16 | 17 | ## Requirements 18 | 19 | 1. A Apache Airflow system deployed. 20 | 2. One or many working PDI CE installations. 21 | 3. A Carte server for Carte Operators. 22 | 23 | ## Setup 24 | 25 | The same setup process must be performed on webserver, scheduler 26 | and workers (that runs this tasks) to get it working. If you want to 27 | deploy specific workers to run this kind of tasks, see 28 | [Queues](https://airflow.apache.org/docs/stable/concepts.html#queues), 29 | in **Airflow** *Concepts* section. 30 | 31 | ### Pip package 32 | 33 | First of all, the package should be installed via `pip install` command. 34 | 35 | ```bash 36 | pip install airflow-pentaho-plugin 37 | ``` 38 | 39 | ### Airflow connection 40 | 41 | Then, a new connection needs to be added to Airflow Connections, to do this, 42 | go to Airflow web UI, and click on `Admin -> Connections` on the top menu. 43 | Now, click on `Create` tab. 44 | 45 | Use HTTP connection type. Enter the **Conn Id**, this plugin uses `pdi_default` 46 | by default, the username and the password for your Pentaho Repository. 47 | 48 | At the bottom of the form, fill the **Extra** field with `pentaho_home`, the 49 | path where your pdi-ce is placed, and `rep`, the repository name for this 50 | connection, using a json formatted string like it follows. 51 | 52 | ```json 53 | { 54 | "pentaho_home": "/opt/pentaho", 55 | "rep": "Default" 56 | } 57 | ``` 58 | 59 | ### Carte 60 | 61 | In order to use `CarteJobOperator`, the connection should be set different. Fill 62 | `host` (including `http://` or `https://`) and `port` for Carte hostname and port, 63 | `username` and `password` for PDI repository, and `extra` as it follows. 64 | 65 | ```json 66 | { 67 | "rep": "Default", 68 | "carte_username": "cluster", 69 | "carte_password": "cluster" 70 | } 71 | ``` 72 | 73 | ## Usage 74 | 75 | ### CarteJobOperator 76 | 77 | CarteJobOperator is responsible for running jobs in remote slave servers. Here 78 | it is an example of `CarteJobOperator` usage. 79 | 80 | ```python 81 | # For versions before 2.0 82 | # from airflow.operators.airflow_pentaho import CarteJobOperator 83 | 84 | from airflow_pentaho.operators.carte import CarteJobOperator 85 | 86 | # ... # 87 | 88 | # Define the task using the CarteJobOperator 89 | avg_spent = CarteJobOperator( 90 | conn_id='pdi_default', 91 | task_id="average_spent", 92 | job="/home/bi/average_spent", 93 | params={"date": "{{ ds }}"}, # Date in yyyy-mm-dd format 94 | dag=dag) 95 | 96 | # ... # 97 | 98 | some_task >> avg_spent >> another_task 99 | ``` 100 | 101 | ### KitchenOperator 102 | 103 | Kitchen operator is responsible for running Jobs. Lets suppose that we have 104 | a defined *Job* saved on `/home/bi/average_spent` in our repository with 105 | the argument `date` as input parameter. Lets define the task using the 106 | `KitchenOperator`. 107 | 108 | ```python 109 | # For versions before 2.0 110 | # from airflow.operators.airflow_pentaho import KitchenOperator 111 | 112 | from airflow_pentaho.operators.kettle import KitchenOperator 113 | 114 | # ... # 115 | 116 | # Define the task using the KitchenOperator 117 | avg_spent = KitchenOperator( 118 | conn_id='pdi_default', 119 | queue="pdi", 120 | task_id="average_spent", 121 | directory="/home/bi", 122 | job="average_spent", 123 | params={"date": "{{ ds }}"}, # Date in yyyy-mm-dd format 124 | dag=dag) 125 | 126 | # ... # 127 | 128 | some_task >> avg_spent >> another_task 129 | ``` 130 | 131 | ### CarteTransOperator 132 | 133 | CarteTransOperator is responsible for running transformations in remote slave 134 | servers. Here it is an example of `CarteTransOperator` usage. 135 | 136 | ```python 137 | # For versions before 2.0 138 | # from airflow.operators.airflow_pentaho import CarteTransOperator 139 | 140 | from airflow_pentaho.operators.carte import CarteTransOperator 141 | 142 | # ... # 143 | 144 | # Define the task using the CarteJobOperator 145 | enriche_customers = CarteTransOperator( 146 | conn_id='pdi_default', 147 | task_id="enrich_customer_data", 148 | job="/home/bi/enrich_customer_data", 149 | params={"date": "{{ ds }}"}, # Date in yyyy-mm-dd format 150 | dag=dag) 151 | 152 | # ... # 153 | 154 | some_task >> enrich_customers >> another_task 155 | ``` 156 | 157 | ### PanOperator 158 | 159 | Pan operator is responsible for running transformations. Lets suppose that 160 | we have one saved on `/home/bi/clean_somedata`. Lets define the task using the 161 | `PanOperator`. In this case, the transformation receives a parameter that 162 | determines the file to be cleaned. 163 | 164 | ```python 165 | # For versions before 2.0 166 | # from airflow.operators.airflow_pentaho import PanOperator 167 | 168 | from airflow_pentaho.operators.kettle import PanOperator 169 | 170 | # ... # 171 | 172 | # Define the task using the PanOperator 173 | clean_input = PanOperator( 174 | conn_id='pdi_default', 175 | queue="pdi", 176 | task_id="cleanup", 177 | directory="/home/bi", 178 | trans="clean_somedata", 179 | params={"file": "/tmp/input_data/{{ ds }}/sells.csv"}, 180 | dag=dag) 181 | 182 | # ... # 183 | 184 | some_task >> clean_input >> another_task 185 | ``` 186 | 187 | For more information, please see `sample_dags/pdi_flow.py` 188 | -------------------------------------------------------------------------------- /tests/unit/hooks/test_kettle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from packaging import version 17 | from unittest import TestCase 18 | 19 | import airflow 20 | from airflow import AirflowException 21 | 22 | from airflow_pentaho.hooks.kettle import PentahoHook 23 | 24 | if version.parse(airflow.__version__) >= version.parse('2.2'): 25 | from airflow.models.param import Param # pylint: disable=ungrouped-imports 26 | 27 | WINDOWS_PDI_HOME = 'C:\\pentaho' # noqa: W605 28 | 29 | DEFAULT_HOME = '/opt/pentaho' 30 | DEFAULT_REP = 'test_repository' 31 | DEFAULT_USERNAME = 'test' 32 | DEFAULT_PASSWORD = 'secret' 33 | 34 | 35 | class TestPentahoClient(TestCase): 36 | """Testing Kettle commands (pan and kitchen) client.""" 37 | 38 | def _get_linux_client(self): 39 | return PentahoHook.PentahoClient(DEFAULT_HOME, 40 | DEFAULT_REP, 41 | DEFAULT_USERNAME, 42 | DEFAULT_PASSWORD, 43 | 'Linux') 44 | 45 | def _get_windows_client(self): 46 | return PentahoHook.PentahoClient(WINDOWS_PDI_HOME, 47 | DEFAULT_REP, 48 | DEFAULT_USERNAME, 49 | DEFAULT_PASSWORD, 50 | 'Windows') 51 | 52 | def test__get_tool_command_template_linux(self): 53 | cli = self._get_linux_client() 54 | tmpl = cli._get_tool_command_template() # pylint: disable=protected-access 55 | self.assertEqual(tmpl, '{}/{}.sh') 56 | 57 | def test__get_tool_command_template_windows(self): 58 | cli = self._get_windows_client() 59 | tmpl = cli._get_tool_command_template() # pylint: disable=protected-access 60 | self.assertEqual(tmpl, '{}\\{}.bat') 61 | 62 | def test__get_tool_command_template_unknown(self): 63 | cli = PentahoHook.PentahoClient(DEFAULT_HOME, 64 | DEFAULT_REP, 65 | DEFAULT_USERNAME, 66 | DEFAULT_PASSWORD, 67 | '') 68 | with self.assertRaises(AirflowException) as context: 69 | cli._get_tool_command_template() # pylint: disable=protected-access 70 | 71 | self.assertTrue('Unsupported platform' 72 | in str(context.exception)) 73 | 74 | def test__build_tool_command_linux(self): 75 | cli = self._get_linux_client() 76 | tmpl = cli._build_tool_command('pan') # pylint: disable=protected-access 77 | self.assertEqual(tmpl, '/opt/pentaho/pan.sh') 78 | 79 | def test__build_tool_command_windows(self): 80 | cli = self._get_windows_client() 81 | tmpl = cli._build_tool_command('pan') # pylint: disable=protected-access 82 | self.assertEqual(tmpl, 'C:\\pentaho\\pan.bat') # noqa: W605 83 | 84 | def test__get_argument_template_linux(self): 85 | cli = self._get_linux_client() 86 | tmpl = cli._get_argument_template() # pylint: disable=protected-access 87 | self.assertEqual(tmpl, '-{}={}') 88 | 89 | def test__get_argument_template_windows(self): 90 | cli = self._get_windows_client() 91 | tmpl = cli._get_argument_template() # pylint: disable=protected-access 92 | self.assertEqual(tmpl, '/{}:{}') 93 | 94 | def test__build_argument_linux(self): 95 | cli = self._get_linux_client() 96 | tmpl = cli._build_argument('key', 'value') # pylint: disable=protected-access 97 | self.assertEqual(tmpl, '-key=value') 98 | 99 | def test__build_argument_windows(self): 100 | cli = self._get_windows_client() 101 | tmpl = cli._build_argument('key', 'value') # pylint: disable=protected-access 102 | self.assertEqual(tmpl, '/key:value') 103 | 104 | def test__build_connection_arguments(self): 105 | cli = self._get_linux_client() 106 | tmpl = cli._build_connection_arguments() # pylint: disable=protected-access 107 | self.assertEqual(tmpl, '-rep=test_repository -user=test -pass=secret') 108 | 109 | def test_build_command(self): 110 | cli = self._get_linux_client() # pylint: disable=protected-access 111 | tmpl = cli.build_command('pan', {'trans': 'test'}, {'version': '3'}) 112 | self.assertEqual(tmpl, '/opt/pentaho/pan.sh -rep=test_repository' 113 | ' -user=test -pass=secret' 114 | ' -trans=test' 115 | ' -param:version=3') 116 | 117 | def test_params_command(self): 118 | cli = self._get_linux_client() 119 | if version.parse(airflow.__version__) >= version.parse('2.2'): 120 | tmpl = cli.build_command('pan', 121 | {'trans': 'test'}, 122 | {'version': Param(5, type='integer', minimum=3)}) 123 | else: 124 | tmpl = cli.build_command('pan', {'trans': 'test'}, {'version': 5}) 125 | self.assertEqual(tmpl, '/opt/pentaho/pan.sh -rep=test_repository' 126 | ' -user=test -pass=secret' 127 | ' -trans=test' 128 | ' -param:version=5') 129 | 130 | def test_empty_params_command(self): 131 | cli = self._get_linux_client() 132 | tmpl = cli.build_command('pan', {'trans': 'test'}, None) 133 | self.assertEqual(tmpl, '/opt/pentaho/pan.sh -rep=test_repository' 134 | ' -user=test -pass=secret' 135 | ' -trans=test' 136 | '') 137 | -------------------------------------------------------------------------------- /airflow_pentaho/hooks/carte.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Carte hook module""" 16 | 17 | 18 | import requests 19 | import xmltodict 20 | from packaging import version 21 | from urllib.parse import urlencode 22 | 23 | from requests.auth import HTTPBasicAuth 24 | 25 | import airflow 26 | 27 | from airflow import AirflowException 28 | if version.parse(airflow.__version__) >= version.parse('2.0'): 29 | from airflow.hooks.base import BaseHook 30 | else: 31 | from airflow.hooks.base_hook import BaseHook 32 | 33 | 34 | class PentahoCarteHook(BaseHook): 35 | """Implementation hook for interact with Carte Rest API""" 36 | 37 | class PentahoCarteClient: 38 | """Implementation for Carte calls""" 39 | 40 | RUN_JOB_ENDPOINT = '/kettle/executeJob/' 41 | JOB_STATUS_ENDPOINT = '/kettle/jobStatus/' 42 | RUN_TRANS_ENDPOINT = '/kettle/executeTrans/' 43 | TRANS_STATUS_ENDPOINT = '/kettle/transStatus/' 44 | 45 | def __init__( 46 | self, 47 | host, 48 | port, 49 | rep, 50 | username, 51 | password, 52 | carte_username, 53 | carte_password, 54 | *args, 55 | level='Basic', 56 | **kwargs): 57 | super().__init__(*args, **kwargs) 58 | self.host = host 59 | if not self.host.startswith('http'): 60 | self.host = 'http://{}'.format(self.host) 61 | self.port = port 62 | self.rep = rep 63 | self.username = username 64 | self.password = password 65 | self.carte_username = carte_username 66 | self.carte_password = carte_password 67 | self.level = level 68 | 69 | def __get_url(self, endpoint): 70 | return '{}:{}{}'.format(self.host, self.port, endpoint) 71 | 72 | def __get_auth(self): 73 | return HTTPBasicAuth(self.carte_username, self.carte_password) 74 | 75 | def job_status(self, job_name, job_id, previous_response=None): 76 | url = self.__get_url(self.JOB_STATUS_ENDPOINT) 77 | headers = {'Content-Type': 'application/x-www-form-urlencoded'} 78 | 79 | from_line = previous_response['jobstatus']['last_log_line_nr'] \ 80 | if previous_response \ 81 | else 0 82 | 83 | payload = { 84 | 'name': job_name, 85 | 'id': job_id, 86 | 'xml': 'Y', 87 | 'from': from_line 88 | } 89 | 90 | rs = requests.post(url=url, headers=headers, 91 | data=urlencode(payload), auth=self.__get_auth()) 92 | if rs.status_code >= 400: 93 | result = xmltodict.parse(rs.content) 94 | raise AirflowException('{}: {}'.format( 95 | result['webresult']['result'], 96 | result['webresult']['message']) 97 | ) 98 | else: 99 | return xmltodict.parse(rs.content) 100 | 101 | def run_job(self, job_path, params=None): 102 | url = self.__get_url(self.RUN_JOB_ENDPOINT) 103 | args = { 104 | 'user': self.username, 105 | 'pass': self.password, 106 | 'rep': self.rep, 107 | 'job': job_path, 108 | 'level': self.level 109 | } 110 | 111 | if params is not None: 112 | for k, val in params.items(): 113 | if version.parse(airflow.__version__) >= version.parse('2.2') and \ 114 | not isinstance(val, str): 115 | args[k] = val.value 116 | else: 117 | args[k] = val 118 | 119 | rs = requests.get(url=url, params=args, auth=self.__get_auth()) 120 | if rs.status_code >= 400: 121 | result = xmltodict.parse(rs.content) 122 | raise AirflowException('{}: {}'.format( 123 | result['webresult']['result'], 124 | result['webresult']['message']) 125 | ) 126 | else: 127 | return xmltodict.parse(rs.content) 128 | 129 | def trans_status(self, trans_name, trans_id=None, previous_response=None): 130 | url = self.__get_url(self.TRANS_STATUS_ENDPOINT) 131 | headers = {'Content-Type': 'application/x-www-form-urlencoded'} 132 | 133 | from_line = previous_response['transstatus']['last_log_line_nr'] \ 134 | if previous_response \ 135 | else 0 136 | 137 | payload = { 138 | 'name': trans_name, 139 | 'xml': 'Y', 140 | 'from': from_line 141 | } 142 | if trans_id: 143 | payload['id'] = trans_id 144 | 145 | rs = requests.post(url=url, headers=headers, 146 | data=urlencode(payload), auth=self.__get_auth()) 147 | if rs.status_code >= 400: 148 | result = xmltodict.parse(rs.content) 149 | raise AirflowException('{}: {}'.format( 150 | result['webresult']['result'], 151 | result['webresult']['message']) 152 | ) 153 | else: 154 | return xmltodict.parse(rs.content) 155 | 156 | def run_trans(self, trans_path, params=None): 157 | url = self.__get_url(self.RUN_TRANS_ENDPOINT) 158 | args = { 159 | 'user': self.username, 160 | 'pass': self.password, 161 | 'rep': self.rep, 162 | 'trans': trans_path, 163 | 'level': self.level 164 | } 165 | 166 | if params is not None: 167 | for k, val in params.items(): 168 | if version.parse(airflow.__version__) >= version.parse('2.2') and \ 169 | not isinstance(val, str): 170 | args[k] = val.value 171 | else: 172 | args[k] = val 173 | 174 | rs = requests.get(url=url, params=args, auth=self.__get_auth()) 175 | if rs.status_code >= 400: 176 | raise AirflowException(rs.content) 177 | 178 | def __init__(self, conn_id='pdi_default', level='Basic'): 179 | if version.parse(airflow.__version__) >= version.parse('2.0'): 180 | super().__init__() 181 | else: 182 | super().__init__(None) 183 | self.conn_id = conn_id 184 | self.level = level 185 | self.connection = self.get_connection(conn_id) 186 | self.extras = self.connection.extra_dejson 187 | self.pentaho_cli = None 188 | 189 | def get_conn(self): 190 | """ 191 | Provide required object to run jobs on Carte 192 | :return: 193 | """ 194 | if self.pentaho_cli: 195 | return self.pentaho_cli 196 | 197 | self.pentaho_cli = self.PentahoCarteClient( 198 | host=self.connection.host, 199 | port=self.connection.port, 200 | rep=self.extras.get('rep'), 201 | username=self.connection.login, 202 | password=self.connection.password, 203 | carte_username=self.extras.get('carte_username'), 204 | carte_password=self.extras.get('carte_password'), 205 | level=self.level) 206 | 207 | return self.pentaho_cli 208 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/carte.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Carte Base Operator module""" 16 | 17 | 18 | import base64 19 | import json 20 | import re 21 | import time 22 | import zlib 23 | 24 | from airflow import AirflowException 25 | from airflow.models import BaseOperator 26 | 27 | from airflow_pentaho.hooks.carte import PentahoCarteHook 28 | 29 | XCOM_RETURN_KEY = 'return_value' 30 | 31 | class CarteBaseOperator(BaseOperator): 32 | """Carte Base Operator""" 33 | 34 | FINISHED_STATUSES = ['Finished'] 35 | ERRORS_STATUSES = [ 36 | 'Stopped', 37 | 'Finished (with errors)', 38 | 'Stopped (with errors)' 39 | ] 40 | END_STATUSES = FINISHED_STATUSES + ERRORS_STATUSES 41 | 42 | DEFAULT_CONN_ID = 'pdi_default' 43 | 44 | template_fields = ('task_params',) 45 | 46 | def _log_logging_string(self, raw_logging_string): 47 | logs = raw_logging_string 48 | cdata = re.match(r'\<\!\[CDATA\[([^\]]+)\]\]\>', logs) 49 | cdata = cdata.group(1) if cdata else raw_logging_string 50 | decoded_lines = zlib.decompress(base64.b64decode(cdata), 51 | 16 + zlib.MAX_WBITS) 52 | err_count = 0 53 | output_line = '' 54 | if decoded_lines: 55 | for line in re.compile(r'\r\n|\n|\r').split( 56 | decoded_lines.decode('utf-8')): 57 | if "error" in line.lower(): 58 | err_count += 1 59 | self.log.info("Errors: %s", err_count) 60 | self.log.info(line) 61 | if len(line)>0: 62 | output_line = line 63 | return output_line, err_count 64 | 65 | 66 | class CarteJobOperator(CarteBaseOperator): 67 | """Carte Job operator. Runs job on Carte service.""" 68 | 69 | LOG_TEMPLATE = '%s: %s, with id %s' 70 | 71 | def __init__(self, 72 | *args, 73 | job=None, 74 | params=None, 75 | pdi_conn_id=None, 76 | level='Basic', 77 | xcom_push=False, 78 | **kwargs): 79 | """ 80 | Execute a Job in a remote Carte server from a PDI repository. 81 | :param job: The full path of the job 82 | :type job: str 83 | :param params: Set a named parameter in a dict as input parameters. 84 | :type params: dict 85 | :param pdi_conn_id: Pentaho Data Integration connection ID. 86 | :type pdi_conn_id: str 87 | :param level: The logging level (Basic, Detailed, Debug, Rowlevel, 88 | Error, Nothing), default is Basic. 89 | :type level: str 90 | """ 91 | super().__init__(*args, **kwargs) 92 | 93 | self.xcom_push_flag = xcom_push 94 | self.pdi_conn_id = pdi_conn_id 95 | if not self.pdi_conn_id: 96 | self.pdi_conn_id = self.DEFAULT_CONN_ID 97 | self.job = job 98 | self.level = level 99 | self.task_params = params 100 | 101 | def _get_pentaho_carte_client(self): 102 | return PentahoCarteHook(conn_id=self.pdi_conn_id, 103 | level=self.level).get_conn() 104 | 105 | def _get_job_name(self): 106 | return self.job.split('/').pop().replace('.kjb', '') 107 | 108 | def execute(self, context): # pylint: disable=unused-argument 109 | conn = self._get_pentaho_carte_client() 110 | 111 | exec_job_rs = conn.run_job(self.job, self.task_params) 112 | message = exec_job_rs['webresult']['message'] 113 | job_id = exec_job_rs['webresult']['id'] 114 | self.log.info('%s: %s, with id %s', message, self.job, job_id) 115 | 116 | status_job_rs = None 117 | status = None 118 | status_desc = None 119 | while not status_job_rs or status_desc not in self.END_STATUSES: 120 | status_job_rs = conn.job_status(self._get_job_name(), job_id, 121 | status_job_rs) 122 | if 'jobstatus' not in status_job_rs: 123 | raise AirflowException( 124 | 'Unexpected server response: ' + json.dumps(status_job_rs)) 125 | 126 | status = status_job_rs['jobstatus'] 127 | status_desc = status['status_desc'] 128 | self.log.info(self.LOG_TEMPLATE, status_desc, self.job, job_id) 129 | output, err_count = self._log_logging_string(status['logging_string']) 130 | 131 | if status_desc not in self.END_STATUSES: 132 | self.log.info('Sleeping 5 seconds before ask again') 133 | time.sleep(5) 134 | 135 | if self.xcom_push_flag: 136 | self.xcom_push(context, key=XCOM_RETURN_KEY, value=output) 137 | self.xcom_push(context, key='err_count', value=err_count) 138 | 139 | if 'error_desc' in status and status['error_desc']: 140 | self.log.error(self.LOG_TEMPLATE, status['error_desc'], 141 | self.job, job_id) 142 | raise AirflowException(status['error_desc']) 143 | 144 | if status_desc in self.ERRORS_STATUSES: 145 | self.log.error(self.LOG_TEMPLATE, status['status_desc'], 146 | self.job, job_id) 147 | raise AirflowException(status['status_desc']) 148 | 149 | 150 | class CarteTransOperator(CarteBaseOperator): 151 | """Cart Transformation operator. Runs job on Carte service.""" 152 | 153 | LOG_TEMPLATE = '%s: %s' 154 | 155 | def __init__(self, 156 | *args, 157 | trans=None, 158 | params=None, 159 | pdi_conn_id=None, 160 | level='Basic', 161 | xcom_push=False, 162 | **kwargs): 163 | """ 164 | Execute a Transformation in a remote Carte server from a PDI 165 | repository. 166 | :param trans: The full path of the transformation. 167 | :type trans: str 168 | :param params: Set a named parameter in a dict as input parameters. 169 | :type params: dict 170 | :param pdi_conn_id: Pentaho Data Integration connection ID. 171 | :type pdi_conn_id: str 172 | :param level: The logging level (Basic, Detailed, Debug, Rowlevel, 173 | Error, Nothing), default is Basic. 174 | :type level: str 175 | """ 176 | super().__init__(*args, **kwargs) 177 | 178 | self.xcom_push_flag = xcom_push 179 | self.pdi_conn_id = pdi_conn_id 180 | if not self.pdi_conn_id: 181 | self.pdi_conn_id = self.DEFAULT_CONN_ID 182 | self.trans = trans 183 | self.level = level 184 | self.task_params = params 185 | 186 | def _get_pentaho_carte_client(self): 187 | return PentahoCarteHook(conn_id=self.pdi_conn_id, 188 | level=self.level).get_conn() 189 | 190 | def _get_trans_name(self): 191 | return self.trans.split('/').pop().replace('.ktr', '') 192 | 193 | def execute(self, context): # pylint: disable=unused-argument 194 | conn = self._get_pentaho_carte_client() 195 | 196 | conn.run_trans(self.trans, self.task_params) 197 | self.log.info('Executing {}'.format(self.trans)) 198 | 199 | status_trans_rs = None 200 | status = None 201 | status_desc = None 202 | trans_id = None 203 | while not status_trans_rs or status_desc not in self.END_STATUSES: 204 | status_trans_rs = conn.trans_status(self._get_trans_name(), 205 | trans_id, 206 | status_trans_rs) 207 | if 'transstatus' not in status_trans_rs: 208 | raise AirflowException( 209 | 'Unexpected server response: ' + json.dumps(status_trans_rs)) 210 | 211 | status = status_trans_rs['transstatus'] 212 | if 'id' in status: 213 | trans_id = status['id'] 214 | status_desc = status['status_desc'] 215 | self.log.info(self.LOG_TEMPLATE, status_desc, self.trans) 216 | output, err_count = self._log_logging_string(status['logging_string']) 217 | 218 | if status_desc not in self.END_STATUSES: 219 | self.log.info('Sleeping 5 seconds before ask again') 220 | time.sleep(5) 221 | 222 | if self.xcom_push_flag: 223 | self.xcom_push(context, key=XCOM_RETURN_KEY, value=output) 224 | self.xcom_push(context, key='err_count', value=err_count) 225 | 226 | if 'error_desc' in status and status['error_desc']: 227 | self.log.error(self.LOG_TEMPLATE, status['error_desc'], self.trans) 228 | raise AirflowException(status['error_desc']) 229 | 230 | if status_desc in self.ERRORS_STATUSES: 231 | self.log.error(self.LOG_TEMPLATE, status['status_desc'], self.trans) 232 | raise AirflowException(status['status_desc']) 233 | 234 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2020 Aneior Studio, SL 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /airflow_pentaho/operators/kettle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2020 Aneior Studio, SL 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """kettle Operator module""" 16 | 17 | 18 | import os 19 | import psutil 20 | import re 21 | import signal 22 | from subprocess import Popen, PIPE, STDOUT 23 | from tempfile import NamedTemporaryFile 24 | from tempfile import TemporaryDirectory 25 | 26 | from airflow import AirflowException 27 | from airflow.models import BaseOperator 28 | 29 | from airflow_pentaho.hooks.kettle import PentahoHook 30 | 31 | XCOM_RETURN_KEY = 'return_value' 32 | 33 | 34 | class PDIBaseOperator(BaseOperator): 35 | """PDIBaseOperator is responsible to run commands and track logging.""" 36 | 37 | DEFAULT_CONN_ID = 'pdi_default' 38 | 39 | def __init__( 40 | self, 41 | task_id=None, 42 | xcom_push=False, 43 | **kwargs): 44 | super().__init__(task_id=task_id, **kwargs) 45 | self.sub_process = None 46 | self.xcom_push_flag = xcom_push 47 | self.command_line = None 48 | self.codes_map: dict = dict() 49 | 50 | def _run_command(self): 51 | 52 | with TemporaryDirectory(prefix='airflowtmp') as tmp_dir: 53 | with NamedTemporaryFile(dir=tmp_dir, prefix=self.task_id) as f: 54 | 55 | f.write(bytes(self.command_line, 'utf_8')) 56 | f.flush() 57 | fname = f.name 58 | script_location = os.path.abspath(fname) 59 | self.log.info( 60 | 'Temporary script location: %s', 61 | script_location 62 | ) 63 | 64 | def pre_exec(): 65 | # Restore default signal disposition and invoke setsid 66 | for sig in ('SIGPIPE', 'SIGXFZ', 'SIGXFSZ'): 67 | if hasattr(signal, sig): 68 | signal.signal(getattr(signal, sig), signal.SIG_DFL) 69 | os.setsid() 70 | 71 | command_line_log = PDIBaseOperator._hide_sensitive_data( 72 | self.command_line) 73 | self.log.info('Running PDI: %s', command_line_log) 74 | self.sub_process = Popen( # pylint: disable=W1509 75 | ['bash', fname], 76 | stdout=PIPE, 77 | stderr=STDOUT, 78 | cwd=tmp_dir) 79 | 80 | self.log.info('Output:') 81 | err_count = 0 82 | line = '' 83 | for line in iter(self.sub_process.stdout.readline, b''): 84 | line = line.decode('utf-8').rstrip() 85 | if "error" in line.lower(): 86 | err_count += 1 87 | self.log.info("Errors: %s", err_count) 88 | self.log.info(line) 89 | self.sub_process.wait() 90 | 91 | message = self.codes_map[self.sub_process.returncode] 92 | self.log.info( 93 | 'Status Code %s: ' + message, 94 | self.sub_process.returncode 95 | ) 96 | 97 | if self.sub_process.returncode: 98 | raise AirflowException(message) 99 | 100 | return line, err_count 101 | 102 | @staticmethod 103 | def _hide_sensitive_data(text): 104 | return re.sub(r'(-|/)pass(=|:)([^\s]+)', '', text) 105 | 106 | def on_kill(self): 107 | if self.sub_process and hasattr(self.sub_process, 'pid'): 108 | self.log.info('Sending SIGTERM signal to PDI process %s', self.sub_process.pid) 109 | 110 | # Get process 111 | parent = psutil.Process(self.sub_process.pid) 112 | 113 | # Terminate 114 | child_processes = parent.children(recursive=True) 115 | for child in child_processes: 116 | child.terminate() 117 | parent.terminate() 118 | 119 | _, alive = psutil.wait_procs(child_processes, timeout=10) 120 | 121 | # Kill 122 | for p in alive: 123 | p.kill() 124 | parent.kill() 125 | 126 | 127 | class PanOperator(PDIBaseOperator): 128 | """PanOperator runs pan.sh and track the logs and tracks logging.""" 129 | 130 | STATUS_CODES = { 131 | 0: 'The transformation ran without a problem.', 132 | 1: 'Errors occurred during processing', 133 | 2: 'An unexpected error occurred during loading / running of the' 134 | ' transformation', 135 | 3: 'Unable to prepare and initialize this transformation', 136 | 7: "The transformation couldn't be loaded from XML or the Repository", 137 | 8: 'Error loading steps or plugins (error in loading one of the' 138 | ' plugins mostly)', 139 | 9: 'Command line usage printing' 140 | } 141 | 142 | template_fields = ('task_params',) 143 | 144 | def __init__(self, 145 | task_id=None, 146 | trans=None, 147 | params=None, 148 | directory=None, 149 | file=None, 150 | pdi_conn_id=None, 151 | level='Basic', 152 | logfile='/dev/stdout', 153 | safemode=False, 154 | maxloglines=0, 155 | maxlogtimeout=0, 156 | **kwargs): 157 | """ 158 | Execute a Pan command (Pentaho Transformation). Pan runs 159 | transformations, either from a PDI repository (database 160 | or enterprise), or from a local file. 161 | 162 | :param trans: The name of the transformation (as it appears in 163 | the repository) to launch 164 | :type trans: str 165 | :param params: Set a named parameter in a dict as input parameters. 166 | :type params: dict 167 | :param directory: The repository directory that contains the 168 | transformation, including the leading slash. 169 | :param file: If you are calling a local KTR file, this is the filename, 170 | including the path (abspath). 171 | :type file: str 172 | :param pdi_conn_id: Pentaho Data Integration connection ID. 173 | :type pdi_conn_id: str 174 | :param level: The logging level (Basic, Detailed, Debug, Rowlevel, 175 | Error, Nothing), default is Basic. 176 | :type level: str 177 | :param logfile: A local filename to write log output to. 178 | :type: logfile: str 179 | :param safemode: Runs in safe mode, which enables extra checking. 180 | :type safemode: bool 181 | :param maxloglines: The maximum number of log lines that are kept 182 | internally by PDI. Set to 0 to keep all rows (default) 183 | :type maxloglines: int 184 | :param maxlogtimeout: The maximum age (in minutes) of a log line while 185 | being kept internally by PDI. Set to 0 to keep all rows 186 | indefinitely (default) 187 | """ 188 | super().__init__(task_id=task_id, **kwargs) 189 | 190 | self.pdi_conn_id = pdi_conn_id 191 | if not self.pdi_conn_id: 192 | self.pdi_conn_id = self.DEFAULT_CONN_ID 193 | self.dir = directory 194 | self.file = file 195 | self.trans = trans 196 | self.level = level 197 | self.logfile = logfile 198 | self.safemode = safemode 199 | self.task_params = params 200 | self.maxloglines = maxloglines 201 | self.maxlogtimeout = maxlogtimeout 202 | self.codes_map = self.STATUS_CODES 203 | 204 | def _get_pentaho_client(self): 205 | return PentahoHook(self.pdi_conn_id).get_conn() 206 | 207 | def execute(self, context): # pylint: disable=unused-argument 208 | conn = self._get_pentaho_client() 209 | 210 | arguments = { 211 | 'dir': self.dir, 212 | 'trans': self.trans, 213 | 'level': self.level, 214 | 'logfile': self.logfile, 215 | 'safemode': 'true' if self.safemode else 'false', 216 | 'maxloglines': str(self.maxloglines), 217 | 'maxlogtimeout': str(self.maxlogtimeout) 218 | } 219 | if self.file: 220 | arguments.update({'file': self.file}) 221 | arguments.update({'norep': 'true'}) 222 | 223 | self.command_line = conn.build_command('pan', arguments, self.task_params) 224 | output, err_count = self._run_command() 225 | 226 | if self.xcom_push_flag: 227 | self.xcom_push(context, key=XCOM_RETURN_KEY, value=output) 228 | self.xcom_push(context, key='err_count', value=err_count) 229 | return output 230 | 231 | 232 | class KitchenOperator(PDIBaseOperator): 233 | """KitchenOperator runs kitchen.sh and tracks logging.""" 234 | 235 | STATUS_CODES = { 236 | 0: 'The job ran without a problem.', 237 | 1: 'Errors occurred during processing', 238 | 2: 'An unexpected error occurred during loading or running of the job', 239 | 7: "The job couldn't be loaded from XML or the Repository", 240 | 8: 'Error loading steps or plugins (error in loading one of the' 241 | ' plugins mostly)', 242 | 9: 'Command line usage printing' 243 | } 244 | 245 | template_fields = ('task_params',) 246 | 247 | def __init__(self, 248 | task_id=None, 249 | job=None, 250 | params=None, 251 | directory=None, 252 | file=None, 253 | pdi_conn_id=None, 254 | level='Basic', 255 | logfile='/dev/stdout', 256 | safemode=False, 257 | maxloglines=0, 258 | maxlogtimeout=0, 259 | **kwargs): 260 | """ 261 | Execute a Kitchen command (Pentaho Job). Kitchen runs jobs, either from 262 | a PDI repository (database or enterprise), or from a local file. 263 | 264 | :param job: The name of the job (as it appears in the repository) to 265 | launch 266 | :type job: str 267 | :param params: Set a named parameter in a dict as input parameters. 268 | :type params: dict 269 | :param directory: The repository directory that contains the 270 | transformation, including the leading slash. 271 | :param file: If you are calling a local KJB file, this is the filename, 272 | including the path (abspath). 273 | :type file: str 274 | :param pdi_conn_id: Pentaho Data Integration connection ID. 275 | :type pdi_conn_id: str 276 | :param level: The logging level (Basic, Detailed, Debug, Rowlevel, 277 | Error, Nothing), default is Basic. 278 | :type level: str 279 | :param logfile: A local filename to write log output to. 280 | :type: logfile: str 281 | :param safemode: Runs in safe mode, which enables extra checking. 282 | :type safemode: bool 283 | :param maxloglines: The maximum number of log lines that are kept 284 | internally by PDI. Set to 0 to keep all rows (default) 285 | :type maxloglines: int 286 | :param maxlogtimeout: The maximum age (in minutes) of a log line while 287 | being kept internally by PDI. Set to 0 to keep all rows 288 | indefinitely (default) 289 | """ 290 | super().__init__(task_id=task_id, **kwargs) 291 | 292 | self.pdi_conn_id = pdi_conn_id 293 | if not self.pdi_conn_id: 294 | self.pdi_conn_id = self.DEFAULT_CONN_ID 295 | self.dir = directory 296 | self.file = file 297 | self.job = job 298 | self.level = level 299 | self.logfile = logfile 300 | self.safemode = safemode 301 | self.task_params = params 302 | self.maxloglines = maxloglines 303 | self.maxlogtimeout = maxlogtimeout 304 | self.codes_map = self.STATUS_CODES 305 | 306 | def _get_pentaho_client(self): 307 | return PentahoHook(self.pdi_conn_id).get_conn() 308 | 309 | def execute(self, context): # pylint: disable=unused-argument 310 | conn = self._get_pentaho_client() 311 | 312 | arguments = { 313 | 'dir': self.dir, 314 | 'job': self.job, 315 | 'level': self.level, 316 | 'logfile': self.logfile, 317 | 'safemode': 'true' if self.safemode else 'false', 318 | 'maxloglines': str(self.maxloglines), 319 | 'maxlogtimeout': str(self.maxlogtimeout) 320 | } 321 | if self.file: 322 | arguments.update({'file': self.file}) 323 | arguments.update({'norep': 'true'}) 324 | 325 | self.command_line = conn.build_command('kitchen', arguments, self.task_params) 326 | output, err_count = self._run_command() 327 | 328 | if self.xcom_push_flag: 329 | self.xcom_push(context, key=XCOM_RETURN_KEY, value=output) 330 | self.xcom_push(context, key='err_count', value=err_count) 331 | return output 332 | -------------------------------------------------------------------------------- /tests/assets/test_job.kjb: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_job 4 | 5 | 6 | 7 | 0 8 | /home 9 | - 10 | 2020/03/25 09:02:42.057 11 | admin 12 | 2020/03/25 09:32:03.289 13 | 14 | 15 | 16 | 17 | ati-dev-ana01 18 | ati-dev-ana01 19 | 9081 20 | pentaho-di 21 | njain 22 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 23 | 24 | 25 | 26 | Y 27 | N 28 | 29 | 30 | ATI-PRD-ANA03 31 | ATI-PRD-ANA03 32 | 9080 33 | pentaho-di 34 | admin 35 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 36 | 37 | 38 | 39 | N 40 | N 41 | 42 | 43 | ATI-STG-ANA03 44 | ATI-STG-ANA03 45 | 9080 46 | pentaho-di 47 | pentaho 48 | Encrypted 2be98afc86aa7f2b4ae17ba71d69dff8b 49 | 50 | 51 | 52 | N 53 | N 54 | 55 | 56 | DI Server 57 | localhost 58 | 9080 59 | pentaho-di 60 | joe 61 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 62 | 63 | 64 | 65 | Y 66 | N 67 | 68 | 69 | DI Server 5.0 70 | localhost 71 | 9080 72 | pentaho-di 73 | admin 74 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 75 | 76 | 77 | 78 | Y 79 | N 80 | 81 | 82 | local 83 | 127.0.0.1 84 | 9080 85 | 86 | cluster 87 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 88 | 89 | 90 | 91 | Y 92 | N 93 | 94 | 95 | Master 96 | localhost 97 | 8081 98 | 99 | cluster 100 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 101 | 102 | 103 | 104 | Y 105 | N 106 | 107 | 108 | remote 109 | 192.168.1.16 110 | 9080 111 | pentaho-di/kettle 112 | admin 113 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 114 | 115 | 116 | 117 | Y 118 | N 119 | 120 | 121 | Slave1 122 | localhost 123 | 8082 124 | 125 | cluster 126 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 127 | 128 | 129 | 130 | N 131 | N 132 | 133 | 134 | Slave2 135 | localhost 136 | 8083 137 | 138 | cluster 139 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 140 | 141 | 142 | 143 | N 144 | N 145 | 146 | 147 | Slave3 148 | localhost 149 | 8084 150 | 151 | cluster 152 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 153 | 154 | 155 | 156 | N 157 | N 158 | 159 | 160 | Slave4 161 | localhost 162 | 8085 163 | 164 | cluster 165 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 166 | 167 | 168 | 169 | N 170 | N 171 | 172 | 173 | Slave5 174 | localhost 175 | 8086 176 | 177 | cluster 178 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 179 | 180 | 181 | 182 | N 183 | N 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | ID_JOB 195 | Y 196 | ID_JOB 197 | 198 | 199 | CHANNEL_ID 200 | Y 201 | CHANNEL_ID 202 | 203 | 204 | JOBNAME 205 | Y 206 | JOBNAME 207 | 208 | 209 | STATUS 210 | Y 211 | STATUS 212 | 213 | 214 | LINES_READ 215 | Y 216 | LINES_READ 217 | 218 | 219 | LINES_WRITTEN 220 | Y 221 | LINES_WRITTEN 222 | 223 | 224 | LINES_UPDATED 225 | Y 226 | LINES_UPDATED 227 | 228 | 229 | LINES_INPUT 230 | Y 231 | LINES_INPUT 232 | 233 | 234 | LINES_OUTPUT 235 | Y 236 | LINES_OUTPUT 237 | 238 | 239 | LINES_REJECTED 240 | Y 241 | LINES_REJECTED 242 | 243 | 244 | ERRORS 245 | Y 246 | ERRORS 247 | 248 | 249 | STARTDATE 250 | Y 251 | STARTDATE 252 | 253 | 254 | ENDDATE 255 | Y 256 | ENDDATE 257 | 258 | 259 | LOGDATE 260 | Y 261 | LOGDATE 262 | 263 | 264 | DEPDATE 265 | Y 266 | DEPDATE 267 | 268 | 269 | REPLAYDATE 270 | Y 271 | REPLAYDATE 272 | 273 | 274 | LOG_FIELD 275 | Y 276 | LOG_FIELD 277 | 278 | 279 | EXECUTING_SERVER 280 | N 281 | EXECUTING_SERVER 282 | 283 | 284 | EXECUTING_USER 285 | N 286 | EXECUTING_USER 287 | 288 | 289 | START_JOB_ENTRY 290 | N 291 | START_JOB_ENTRY 292 | 293 | 294 | CLIENT 295 | N 296 | CLIENT 297 | 298 | 299 | 300 | 301 | 302 |
303 | 304 | 305 | ID_BATCH 306 | Y 307 | ID_BATCH 308 | 309 | 310 | CHANNEL_ID 311 | Y 312 | CHANNEL_ID 313 | 314 | 315 | LOG_DATE 316 | Y 317 | LOG_DATE 318 | 319 | 320 | JOBNAME 321 | Y 322 | TRANSNAME 323 | 324 | 325 | JOBENTRYNAME 326 | Y 327 | STEPNAME 328 | 329 | 330 | LINES_READ 331 | Y 332 | LINES_READ 333 | 334 | 335 | LINES_WRITTEN 336 | Y 337 | LINES_WRITTEN 338 | 339 | 340 | LINES_UPDATED 341 | Y 342 | LINES_UPDATED 343 | 344 | 345 | LINES_INPUT 346 | Y 347 | LINES_INPUT 348 | 349 | 350 | LINES_OUTPUT 351 | Y 352 | LINES_OUTPUT 353 | 354 | 355 | LINES_REJECTED 356 | Y 357 | LINES_REJECTED 358 | 359 | 360 | ERRORS 361 | Y 362 | ERRORS 363 | 364 | 365 | RESULT 366 | Y 367 | RESULT 368 | 369 | 370 | NR_RESULT_ROWS 371 | Y 372 | NR_RESULT_ROWS 373 | 374 | 375 | NR_RESULT_FILES 376 | Y 377 | NR_RESULT_FILES 378 | 379 | 380 | LOG_FIELD 381 | N 382 | LOG_FIELD 383 | 384 | 385 | COPY_NR 386 | N 387 | COPY_NR 388 | 389 | 390 | 391 | 392 | 393 |
394 | 395 | 396 | ID_BATCH 397 | Y 398 | ID_BATCH 399 | 400 | 401 | CHANNEL_ID 402 | Y 403 | CHANNEL_ID 404 | 405 | 406 | LOG_DATE 407 | Y 408 | LOG_DATE 409 | 410 | 411 | LOGGING_OBJECT_TYPE 412 | Y 413 | LOGGING_OBJECT_TYPE 414 | 415 | 416 | OBJECT_NAME 417 | Y 418 | OBJECT_NAME 419 | 420 | 421 | OBJECT_COPY 422 | Y 423 | OBJECT_COPY 424 | 425 | 426 | REPOSITORY_DIRECTORY 427 | Y 428 | REPOSITORY_DIRECTORY 429 | 430 | 431 | FILENAME 432 | Y 433 | FILENAME 434 | 435 | 436 | OBJECT_ID 437 | Y 438 | OBJECT_ID 439 | 440 | 441 | OBJECT_REVISION 442 | Y 443 | OBJECT_REVISION 444 | 445 | 446 | PARENT_CHANNEL_ID 447 | Y 448 | PARENT_CHANNEL_ID 449 | 450 | 451 | ROOT_CHANNEL_ID 452 | Y 453 | ROOT_CHANNEL_ID 454 | 455 | 456 | N 457 | 458 | 459 | 460 | Start 461 | 462 | SPECIAL 463 | 464 | Y 465 | N 466 | N 467 | 0 468 | 0 469 | 60 470 | 12 471 | 0 472 | 1 473 | 1 474 | N 475 | Y 476 | 0 477 | 320 478 | 256 479 | 480 | 481 | 482 | Success 483 | 484 | SUCCESS 485 | 486 | N 487 | Y 488 | 0 489 | 592 490 | 256 491 | 492 | 493 | 494 | 495 | 496 | Start 497 | Success 498 | 0 499 | 0 500 | Y 501 | Y 502 | Y 503 | 504 | 505 | 506 | 507 | 508 | 509 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | # This Pylint rcfile contains a best-effort configuration to uphold the 2 | # best-practices and style described in the Google Python style guide: 3 | # https://google.github.io/styleguide/pyguide.html 4 | # 5 | # Its canonical open-source location is: 6 | # https://google.github.io/styleguide/pylintrc 7 | 8 | [MASTER] 9 | 10 | # Files or directories to be skipped. They should be base names, not paths. 11 | ignore=third_party 12 | 13 | # Files or directories matching the regex patterns are skipped. The regex 14 | # matches against base names, not paths. 15 | ignore-patterns= 16 | 17 | # Pickle collected data for later comparisons. 18 | persistent=no 19 | 20 | # List of plugins (as comma separated values of python modules names) to load, 21 | # usually to register additional checkers. 22 | load-plugins= 23 | 24 | # Use multiple processes to speed up Pylint. 25 | jobs=4 26 | 27 | # Allow loading of arbitrary C extensions. Extensions are imported into the 28 | # active Python interpreter and may run arbitrary code. 29 | unsafe-load-any-extension=no 30 | 31 | 32 | [MESSAGES CONTROL] 33 | 34 | # Only show warnings with the listed confidence levels. Leave empty to show 35 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 36 | confidence= 37 | 38 | # Enable the message, report, category or checker with the given id(s). You can 39 | # either give multiple identifier separated by comma (,) or put this option 40 | # multiple time (only on the command line, not in the configuration file where 41 | # it should appear only once). See also the "--disable" option for examples. 42 | #enable= 43 | 44 | # Disable the message, report, category or checker with the given id(s). You 45 | # can either give multiple identifiers separated by comma (,) or put this 46 | # option multiple times (only on the command line, not in the configuration 47 | # file where it should appear only once).You can also use "--disable=all" to 48 | # disable everything first and then reenable specific checks. For example, if 49 | # you want to run only the similarities checker, you can use "--disable=all 50 | # --enable=similarities". If you want to run only the classes checker, but have 51 | # no Warning level messages displayed, use"--disable=all --enable=classes 52 | # --disable=W" 53 | disable=abstract-method, 54 | apply-builtin, 55 | arguments-differ, 56 | attribute-defined-outside-init, 57 | backtick, 58 | bad-option-value, 59 | basestring-builtin, 60 | buffer-builtin, 61 | c-extension-no-member, 62 | consider-using-enumerate, 63 | cmp-builtin, 64 | cmp-method, 65 | coerce-builtin, 66 | coerce-method, 67 | delslice-method, 68 | div-method, 69 | duplicate-code, 70 | eq-without-hash, 71 | execfile-builtin, 72 | file-builtin, 73 | filter-builtin-not-iterating, 74 | fixme, 75 | getslice-method, 76 | global-statement, 77 | hex-method, 78 | idiv-method, 79 | implicit-str-concat-in-sequence, 80 | import-error, 81 | import-self, 82 | import-star-module-level, 83 | inconsistent-return-statements, 84 | input-builtin, 85 | intern-builtin, 86 | invalid-str-codec, 87 | locally-disabled, 88 | long-builtin, 89 | long-suffix, 90 | map-builtin-not-iterating, 91 | misplaced-comparison-constant, 92 | missing-function-docstring, 93 | metaclass-assignment, 94 | next-method-called, 95 | next-method-defined, 96 | no-absolute-import, 97 | no-else-break, 98 | no-else-continue, 99 | no-else-raise, 100 | no-else-return, 101 | no-init, # added 102 | no-member, 103 | no-name-in-module, 104 | no-self-use, 105 | nonzero-method, 106 | oct-method, 107 | old-division, 108 | old-ne-operator, 109 | old-octal-literal, 110 | old-raise-syntax, 111 | parameter-unpacking, 112 | print-statement, 113 | raising-string, 114 | range-builtin-not-iterating, 115 | raw_input-builtin, 116 | rdiv-method, 117 | reduce-builtin, 118 | relative-import, 119 | reload-builtin, 120 | round-builtin, 121 | setslice-method, 122 | signature-differs, 123 | standarderror-builtin, 124 | suppressed-message, 125 | sys-max-int, 126 | too-few-public-methods, 127 | too-many-ancestors, 128 | too-many-arguments, 129 | too-many-boolean-expressions, 130 | too-many-branches, 131 | too-many-instance-attributes, 132 | too-many-locals, 133 | too-many-nested-blocks, 134 | too-many-public-methods, 135 | too-many-return-statements, 136 | too-many-statements, 137 | trailing-newlines, 138 | unichr-builtin, 139 | unicode-builtin, 140 | unnecessary-pass, 141 | unpacking-in-except, 142 | useless-else-on-loop, 143 | useless-object-inheritance, 144 | useless-suppression, 145 | using-cmp-argument, 146 | wrong-import-order, 147 | xrange-builtin, 148 | zip-builtin-not-iterating, 149 | pointless-statement, 150 | missing-module-docstring 151 | 152 | 153 | [REPORTS] 154 | 155 | # Set the output format. Available formats are text, parseable, colorized, msvs 156 | # (visual studio) and html. You can also give a reporter class, eg 157 | # mypackage.mymodule.MyReporterClass. 158 | output-format=text 159 | 160 | # Put messages in a separate file for each module / package specified on the 161 | # command line instead of printing them on stdout. Reports (if any) will be 162 | # written in a file name "pylint_global.[txt|html]". This option is deprecated 163 | # and it will be removed in Pylint 2.0. 164 | files-output=no 165 | 166 | # Tells whether to display a full report or only the messages 167 | reports=no 168 | 169 | # Python expression which should return a note less than 10 (10 is the highest 170 | # note). You have access to the variables errors warning, statement which 171 | # respectively contain the number of errors / warnings messages and the total 172 | # number of statements analyzed. This is used by the global evaluation report 173 | # (RP0004). 174 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 175 | 176 | # Template used to display messages. This is a python new-style format string 177 | # used to format the message information. See doc for all details 178 | #msg-template= 179 | 180 | 181 | [BASIC] 182 | 183 | # Good variable names which should always be accepted, separated by a comma 184 | good-names=main,_ 185 | 186 | # Bad variable names which should always be refused, separated by a comma 187 | bad-names= 188 | 189 | # Colon-delimited sets of names that determine each other's naming style when 190 | # the name regexes allow several styles. 191 | name-group= 192 | 193 | # Include a hint for the correct naming format with invalid-name 194 | include-naming-hint=no 195 | 196 | # List of decorators that produce properties, such as abc.abstractproperty. Add 197 | # to this list to register other decorators that produce valid properties. 198 | property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl 199 | 200 | # Regular expression matching correct function names 201 | function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ 202 | 203 | # Regular expression matching correct variable names 204 | variable-rgx=^[a-z][a-z0-9_]*$ 205 | 206 | # Regular expression matching correct constant names 207 | const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ 208 | 209 | # Regular expression matching correct attribute names 210 | attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ 211 | 212 | # Regular expression matching correct argument names 213 | argument-rgx=^[a-z][a-z0-9_]*$ 214 | 215 | # Regular expression matching correct class attribute names 216 | class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ 217 | 218 | # Regular expression matching correct inline iteration names 219 | inlinevar-rgx=^[a-z][a-z0-9_]*$ 220 | 221 | # Regular expression matching correct class names 222 | class-rgx=^_?[A-Z][a-zA-Z0-9]*$ 223 | 224 | # Regular expression matching correct module names 225 | module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ 226 | 227 | # Regular expression matching correct method names 228 | method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ 229 | 230 | # Regular expression which should only match function or class names that do 231 | # not require a docstring. 232 | no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$ 233 | 234 | # Minimum line length for functions/classes that require docstrings, shorter 235 | # ones are exempt. 236 | docstring-min-length=10 237 | 238 | 239 | [TYPECHECK] 240 | 241 | # List of decorators that produce context managers, such as 242 | # contextlib.contextmanager. Add to this list to register other decorators that 243 | # produce valid context managers. 244 | contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager 245 | 246 | # Tells whether missing members accessed in mixin class should be ignored. A 247 | # mixin class is detected if its name ends with "mixin" (case insensitive). 248 | ignore-mixin-members=yes 249 | 250 | # List of module names for which member attributes should not be checked 251 | # (useful for modules/projects where namespaces are manipulated during runtime 252 | # and thus existing member attributes cannot be deduced by static analysis. It 253 | # supports qualified module names, as well as Unix pattern matching. 254 | ignored-modules= 255 | 256 | # List of class names for which member attributes should not be checked (useful 257 | # for classes with dynamically set attributes). This supports the use of 258 | # qualified names. 259 | ignored-classes=optparse.Values,thread._local,_thread._local 260 | 261 | # List of members which are set dynamically and missed by pylint inference 262 | # system, and so shouldn't trigger E1101 when accessed. Python regular 263 | # expressions are accepted. 264 | generated-members= 265 | 266 | 267 | [FORMAT] 268 | 269 | # Maximum number of characters on a single line. 270 | max-line-length=100 271 | 272 | # TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt 273 | # lines made too long by directives to pytype. 274 | 275 | # Regexp for a line that is allowed to be longer than the limit. 276 | ignore-long-lines=(?x)( 277 | ^\s*(\#\ )??$| 278 | ^\s*(from\s+\S+\s+)?import\s+.+$) 279 | 280 | # Allow the body of an if to be on the same line as the test if there is no 281 | # else. 282 | single-line-if-stmt=yes 283 | 284 | # List of optional constructs for which whitespace checking is disabled. `dict- 285 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 286 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 287 | # `empty-line` allows space-only lines. 288 | no-space-check= 289 | 290 | # Maximum number of lines in a module 291 | max-module-lines=99999 292 | 293 | # String used as indentation unit. The internal Google style guide mandates 2 294 | # spaces. Google's externaly-published style guide says 4, consistent with 295 | # PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google 296 | # projects (like TensorFlow). 297 | indent-string=' ' 298 | 299 | # Number of spaces of indent required inside a hanging or continued line. 300 | indent-after-paren=4 301 | 302 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 303 | expected-line-ending-format= 304 | 305 | 306 | [MISCELLANEOUS] 307 | 308 | # List of note tags to take in consideration, separated by a comma. 309 | notes=TODO 310 | 311 | 312 | [STRING] 313 | 314 | # This flag controls whether inconsistent-quotes generates a warning when the 315 | # character used as a quote delimiter is used inconsistently within a module. 316 | check-quote-consistency=yes 317 | 318 | 319 | [VARIABLES] 320 | 321 | # Tells whether we should check for unused import in __init__ files. 322 | init-import=no 323 | 324 | # A regular expression matching the name of dummy variables (i.e. expectedly 325 | # not used). 326 | dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) 327 | 328 | # List of additional names supposed to be defined in builtins. Remember that 329 | # you should avoid to define new builtins when possible. 330 | additional-builtins= 331 | 332 | # List of strings which can identify a callback function by name. A callback 333 | # name must start or end with one of those strings. 334 | callbacks=cb_,_cb 335 | 336 | # List of qualified module names which can have objects that can redefine 337 | # builtins. 338 | redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools 339 | 340 | 341 | [LOGGING] 342 | 343 | # Logging modules to check that the string format arguments are in logging 344 | # function parameter format 345 | logging-modules=logging,absl.logging,tensorflow.io.logging 346 | 347 | 348 | [SIMILARITIES] 349 | 350 | # Minimum lines number of a similarity. 351 | min-similarity-lines=4 352 | 353 | # Ignore comments when computing similarities. 354 | ignore-comments=yes 355 | 356 | # Ignore docstrings when computing similarities. 357 | ignore-docstrings=yes 358 | 359 | # Ignore imports when computing similarities. 360 | ignore-imports=no 361 | 362 | 363 | [SPELLING] 364 | 365 | # Spelling dictionary name. Available dictionaries: none. To make it working 366 | # install python-enchant package. 367 | spelling-dict= 368 | 369 | # List of comma separated words that should not be checked. 370 | spelling-ignore-words= 371 | 372 | # A path to a file that contains private dictionary; one word per line. 373 | spelling-private-dict-file= 374 | 375 | # Tells whether to store unknown words to indicated private dictionary in 376 | # --spelling-private-dict-file option instead of raising a message. 377 | spelling-store-unknown-words=no 378 | 379 | 380 | [IMPORTS] 381 | 382 | # Deprecated modules which should not be used, separated by a comma 383 | deprecated-modules=regsub, 384 | TERMIOS, 385 | Bastion, 386 | rexec, 387 | sets 388 | 389 | # Create a graph of every (i.e. internal and external) dependencies in the 390 | # given file (report RP0402 must not be disabled) 391 | import-graph= 392 | 393 | # Create a graph of external dependencies in the given file (report RP0402 must 394 | # not be disabled) 395 | ext-import-graph= 396 | 397 | # Create a graph of internal dependencies in the given file (report RP0402 must 398 | # not be disabled) 399 | int-import-graph= 400 | 401 | # Force import order to recognize a module as part of the standard 402 | # compatibility libraries. 403 | known-standard-library= 404 | 405 | # Force import order to recognize a module as part of a third party library. 406 | known-third-party=enchant, absl 407 | 408 | # Analyse import fallback blocks. This can be used to support both Python 2 and 409 | # 3 compatible code, which means that the block might have code that exists 410 | # only in one or another interpreter, leading to false positives when analysed. 411 | analyse-fallback-blocks=no 412 | 413 | 414 | [CLASSES] 415 | 416 | # List of method names used to declare (i.e. assign) instance attributes. 417 | defining-attr-methods=__init__, 418 | __new__, 419 | setUp 420 | 421 | # List of member names, which should be excluded from the protected access 422 | # warning. 423 | exclude-protected=_asdict, 424 | _fields, 425 | _replace, 426 | _source, 427 | _make 428 | 429 | # List of valid names for the first argument in a class method. 430 | valid-classmethod-first-arg=cls, 431 | class_ 432 | 433 | # List of valid names for the first argument in a metaclass class method. 434 | valid-metaclass-classmethod-first-arg=mcs 435 | 436 | 437 | [EXCEPTIONS] 438 | 439 | # Exceptions that will emit a warning when being caught. Defaults to 440 | # "Exception" 441 | overgeneral-exceptions=StandardError, 442 | Exception, 443 | BaseException 444 | -------------------------------------------------------------------------------- /tests/assets/test_trans.ktr: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test_trans 5 | 6 | 7 | 8 | Normal 9 | /home 10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | ID_BATCH 22 | Y 23 | ID_BATCH 24 | 25 | 26 | CHANNEL_ID 27 | Y 28 | CHANNEL_ID 29 | 30 | 31 | TRANSNAME 32 | Y 33 | TRANSNAME 34 | 35 | 36 | STATUS 37 | Y 38 | STATUS 39 | 40 | 41 | LINES_READ 42 | Y 43 | LINES_READ 44 | 45 | 46 | 47 | LINES_WRITTEN 48 | Y 49 | LINES_WRITTEN 50 | 51 | 52 | 53 | LINES_UPDATED 54 | Y 55 | LINES_UPDATED 56 | 57 | 58 | 59 | LINES_INPUT 60 | Y 61 | LINES_INPUT 62 | 63 | 64 | 65 | LINES_OUTPUT 66 | Y 67 | LINES_OUTPUT 68 | 69 | 70 | 71 | LINES_REJECTED 72 | Y 73 | LINES_REJECTED 74 | 75 | 76 | 77 | ERRORS 78 | Y 79 | ERRORS 80 | 81 | 82 | STARTDATE 83 | Y 84 | STARTDATE 85 | 86 | 87 | ENDDATE 88 | Y 89 | ENDDATE 90 | 91 | 92 | LOGDATE 93 | Y 94 | LOGDATE 95 | 96 | 97 | DEPDATE 98 | Y 99 | DEPDATE 100 | 101 | 102 | REPLAYDATE 103 | Y 104 | REPLAYDATE 105 | 106 | 107 | LOG_FIELD 108 | Y 109 | LOG_FIELD 110 | 111 | 112 | EXECUTING_SERVER 113 | N 114 | EXECUTING_SERVER 115 | 116 | 117 | EXECUTING_USER 118 | N 119 | EXECUTING_USER 120 | 121 | 122 | CLIENT 123 | N 124 | CLIENT 125 | 126 | 127 | 128 | 129 | 130 |
131 | 132 | 133 | 134 | ID_BATCH 135 | Y 136 | ID_BATCH 137 | 138 | 139 | SEQ_NR 140 | Y 141 | SEQ_NR 142 | 143 | 144 | LOGDATE 145 | Y 146 | LOGDATE 147 | 148 | 149 | TRANSNAME 150 | Y 151 | TRANSNAME 152 | 153 | 154 | STEPNAME 155 | Y 156 | STEPNAME 157 | 158 | 159 | STEP_COPY 160 | Y 161 | STEP_COPY 162 | 163 | 164 | LINES_READ 165 | Y 166 | LINES_READ 167 | 168 | 169 | LINES_WRITTEN 170 | Y 171 | LINES_WRITTEN 172 | 173 | 174 | LINES_UPDATED 175 | Y 176 | LINES_UPDATED 177 | 178 | 179 | LINES_INPUT 180 | Y 181 | LINES_INPUT 182 | 183 | 184 | LINES_OUTPUT 185 | Y 186 | LINES_OUTPUT 187 | 188 | 189 | LINES_REJECTED 190 | Y 191 | LINES_REJECTED 192 | 193 | 194 | ERRORS 195 | Y 196 | ERRORS 197 | 198 | 199 | INPUT_BUFFER_ROWS 200 | Y 201 | INPUT_BUFFER_ROWS 202 | 203 | 204 | OUTPUT_BUFFER_ROWS 205 | Y 206 | OUTPUT_BUFFER_ROWS 207 | 208 | 209 | 210 | 211 | 212 |
213 | 214 | 215 | ID_BATCH 216 | Y 217 | ID_BATCH 218 | 219 | 220 | CHANNEL_ID 221 | Y 222 | CHANNEL_ID 223 | 224 | 225 | LOG_DATE 226 | Y 227 | LOG_DATE 228 | 229 | 230 | LOGGING_OBJECT_TYPE 231 | Y 232 | LOGGING_OBJECT_TYPE 233 | 234 | 235 | OBJECT_NAME 236 | Y 237 | OBJECT_NAME 238 | 239 | 240 | OBJECT_COPY 241 | Y 242 | OBJECT_COPY 243 | 244 | 245 | REPOSITORY_DIRECTORY 246 | Y 247 | REPOSITORY_DIRECTORY 248 | 249 | 250 | FILENAME 251 | Y 252 | FILENAME 253 | 254 | 255 | OBJECT_ID 256 | Y 257 | OBJECT_ID 258 | 259 | 260 | OBJECT_REVISION 261 | Y 262 | OBJECT_REVISION 263 | 264 | 265 | PARENT_CHANNEL_ID 266 | Y 267 | PARENT_CHANNEL_ID 268 | 269 | 270 | ROOT_CHANNEL_ID 271 | Y 272 | ROOT_CHANNEL_ID 273 | 274 | 275 | 276 | 277 | 278 |
279 | 280 | 281 | ID_BATCH 282 | Y 283 | ID_BATCH 284 | 285 | 286 | CHANNEL_ID 287 | Y 288 | CHANNEL_ID 289 | 290 | 291 | LOG_DATE 292 | Y 293 | LOG_DATE 294 | 295 | 296 | TRANSNAME 297 | Y 298 | TRANSNAME 299 | 300 | 301 | STEPNAME 302 | Y 303 | STEPNAME 304 | 305 | 306 | STEP_COPY 307 | Y 308 | STEP_COPY 309 | 310 | 311 | LINES_READ 312 | Y 313 | LINES_READ 314 | 315 | 316 | LINES_WRITTEN 317 | Y 318 | LINES_WRITTEN 319 | 320 | 321 | LINES_UPDATED 322 | Y 323 | LINES_UPDATED 324 | 325 | 326 | LINES_INPUT 327 | Y 328 | LINES_INPUT 329 | 330 | 331 | LINES_OUTPUT 332 | Y 333 | LINES_OUTPUT 334 | 335 | 336 | LINES_REJECTED 337 | Y 338 | LINES_REJECTED 339 | 340 | 341 | ERRORS 342 | Y 343 | ERRORS 344 | 345 | 346 | LOG_FIELD 347 | N 348 | LOG_FIELD 349 | 350 | 351 | 352 | 353 | 354 |
355 | 356 | 357 | ID_BATCH 358 | Y 359 | ID_BATCH 360 | 361 | 362 | CHANNEL_ID 363 | Y 364 | CHANNEL_ID 365 | 366 | 367 | LOG_DATE 368 | Y 369 | LOG_DATE 370 | 371 | 372 | METRICS_DATE 373 | Y 374 | METRICS_DATE 375 | 376 | 377 | METRICS_CODE 378 | Y 379 | METRICS_CODE 380 | 381 | 382 | METRICS_DESCRIPTION 383 | Y 384 | METRICS_DESCRIPTION 385 | 386 | 387 | METRICS_SUBJECT 388 | Y 389 | METRICS_SUBJECT 390 | 391 | 392 | METRICS_TYPE 393 | Y 394 | METRICS_TYPE 395 | 396 | 397 | METRICS_VALUE 398 | Y 399 | METRICS_VALUE 400 | 401 | 402 | 403 | 404 | 405 |
406 | 407 | 0.0 408 | 0.0 409 | 410 | 10000 411 | 50 412 | 50 413 | N 414 | Y 415 | 50000 416 | Y 417 | 418 | N 419 | 1000 420 | 100 421 | 422 | 423 | 424 | 425 | cluster 426 | N 427 | 428 | 429 | 430 | dynamic 431 | Y 432 | 1 433 | 434 | 435 | 436 | 437 | ati-dev-ana01 438 | ati-dev-ana01 439 | 9081 440 | pentaho-di 441 | njain 442 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 443 | 444 | 445 | 446 | Y 447 | N 448 | 449 | 450 | ATI-PRD-ANA03 451 | ATI-PRD-ANA03 452 | 9080 453 | pentaho-di 454 | admin 455 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 456 | 457 | 458 | 459 | N 460 | N 461 | 462 | 463 | ATI-STG-ANA03 464 | ATI-STG-ANA03 465 | 9080 466 | pentaho-di 467 | pentaho 468 | Encrypted 2be98afc86aa7f2b4ae17ba71d69dff8b 469 | 470 | 471 | 472 | N 473 | N 474 | 475 | 476 | DI Server 477 | localhost 478 | 9080 479 | pentaho-di 480 | joe 481 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 482 | 483 | 484 | 485 | Y 486 | N 487 | 488 | 489 | DI Server 5.0 490 | localhost 491 | 9080 492 | pentaho-di 493 | admin 494 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 495 | 496 | 497 | 498 | Y 499 | N 500 | 501 | 502 | local 503 | 127.0.0.1 504 | 9080 505 | 506 | cluster 507 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 508 | 509 | 510 | 511 | Y 512 | N 513 | 514 | 515 | Master 516 | localhost 517 | 8081 518 | 519 | cluster 520 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 521 | 522 | 523 | 524 | Y 525 | N 526 | 527 | 528 | remote 529 | 192.168.1.16 530 | 9080 531 | pentaho-di/kettle 532 | admin 533 | Encrypted 2be98afc86aa7f2e4bb18bd63c99dbdde 534 | 535 | 536 | 537 | Y 538 | N 539 | 540 | 541 | Slave1 542 | localhost 543 | 8082 544 | 545 | cluster 546 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 547 | 548 | 549 | 550 | N 551 | N 552 | 553 | 554 | Slave2 555 | localhost 556 | 8083 557 | 558 | cluster 559 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 560 | 561 | 562 | 563 | N 564 | N 565 | 566 | 567 | Slave3 568 | localhost 569 | 8084 570 | 571 | cluster 572 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 573 | 574 | 575 | 576 | N 577 | N 578 | 579 | 580 | Slave4 581 | localhost 582 | 8085 583 | 584 | cluster 585 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 586 | 587 | 588 | 589 | N 590 | N 591 | 592 | 593 | Slave5 594 | localhost 595 | 8086 596 | 597 | cluster 598 | Encrypted 2be98afc86aa7f2e4cb1aa265cd86aac8 599 | 600 | 601 | 602 | N 603 | N 604 | 605 | 606 | 607 | 608 | ClusterSchema 609 | 40000 610 | 2000 611 | 5000 612 | Y 613 | N 614 | 615 | Slave3 616 | Slave2 617 | Slave1 618 | Master 619 | 620 | 621 | 622 | ClusterSchema_dynamic 623 | 40000 624 | 2000 625 | 5000 626 | Y 627 | Y 628 | 629 | Master 630 | 631 | 632 | 633 | - 634 | 2020/03/25 09:51:52.318 635 | admin 636 | 2020/03/25 10:35:19.746 637 | 638 | N 639 | 640 | 641 | 642 | 643 | 644 | 645 | Dummy (do nothing) 646 | Dummy 647 | 648 | Y 649 | 650 | 1 651 | 652 | none 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 384 665 | 224 666 | Y 667 | 668 | 669 | 670 | 671 | 672 | 673 | N 674 | 675 | 676 | --------------------------------------------------------------------------------