├── .github
    ├── ISSUE_TEMPLATE.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── ci
    │   ├── .pre-commit-config.yaml
    │   ├── .pylintrc
    │   └── .style.yapf
    └── workflows
    │   ├── automerge.yml
    │   ├── cherrypick.yml
    │   ├── ci.yml
    │   ├── ci_examples.yml
    │   ├── filter_examples.py
    │   ├── filter_projects.py
    │   ├── lint.yml
    │   ├── minor_release.yml
    │   ├── prepare_minor_release.py
    │   ├── release.yml
    │   └── update_main.py
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── .style.yapf
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── RELEASE.md
├── examples
    ├── README.md
    ├── example_filter
    │   ├── data
    │   │   └── test_data.csv
    │   ├── filter_example_colab.ipynb
    │   └── filter_function.py
    ├── fraud_feast
    │   ├── README.md
    │   ├── feast_pipeline_local.py
    │   ├── repo
    │   │   ├── .gitignore
    │   │   ├── driver_repo.py
    │   │   └── feature_store.yaml
    │   └── requirements.txt
    ├── model_card_generator
    │   ├── .gitignore
    │   └── MLMD_Model_Card_Toolkit_Demo.ipynb
    ├── pandas_transform
    │   ├── README.md
    │   ├── pandas_transform_example.ipynb
    │   └── requirements.txt
    ├── sklearn_penguins
    │   ├── .gitignore
    │   └── README.md
    └── xgboost_penguins
    │   ├── README.md
    │   ├── __init__.py
    │   ├── data
    │       └── penguins_processed.csv
    │   ├── penguin_pipeline_local.py
    │   ├── penguin_pipeline_local_e2e_test.py
    │   ├── requirements.txt
    │   └── utils.py
├── proposals
    ├── 20210404-sklearn_example.md
    ├── 20210507-mlmd_client_lib.md
    ├── 20210525-examplefilter.md
    ├── 20210525-feast_example_gen.md
    ├── 20210526-model_load_test_component.md
    ├── 20210605-schema_curation_custom_component.md
    ├── 20210707-xgboost_evaluator.md
    ├── 20210721-sampling_component.md
    ├── 20210723-feature_selection_custom_component.md
    ├── 20210817-firebase_ml_publisher_component.md
    ├── 20211124-model_card_component.md
    ├── 20220117-exit-handler-slack.md
    ├── 20220118-upload_predictions_to_bigquery.md
    ├── 20220513-pandas_transform.md
    ├── 20220802-project_pytorch_example.md
    ├── 20220823-huggingface_model_pusher.md
    ├── 20230209-copy_example_gen.md
    ├── 20230328-airflow_orchestration.md
    ├── README.md
    └── yyyymmdd-project_template.md
├── pyproject.toml
├── setup.py
└── tfx_addons
    ├── __init__.py
    ├── apache_airflow
        └── README.md
    ├── copy_example_gen
        ├── README.md
        ├── __init__.py
        ├── component.py
        └── component_test.py
    ├── example_filter
        ├── README.md
        ├── RELEASE.md
        ├── component.py
        ├── component_test.py
        ├── data
        │   └── test_data.csv
        └── filter_function.py
    ├── feast_examplegen
        ├── README.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        ├── converters.py
        ├── executor.py
        └── executor_test.py
    ├── feature_selection
        ├── CONTRIBUTING.md
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        ├── data
        │   └── data.csv
        ├── example
        │   ├── Iris_example_colab.ipynb
        │   ├── Palmer_Penguins_example_colab.ipynb
        │   ├── Pima_Indians_Diabetes_example_colab.ipynb
        │   └── modules
        │   │   ├── iris_module_file.py
        │   │   ├── penguins_module.py
        │   │   └── pima_indians_module_file.py
        ├── nb
        │   └── Example.ipynb
        ├── requirements.txt
        └── test
        │   └── iris.csv
    ├── firebase_publisher
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        ├── executor.py
        ├── runner.py
        └── runner_test.py
    ├── huggingface_pusher
        ├── README.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        ├── executor.py
        ├── executor_test.py
        ├── runner.py
        └── runner_test.py
    ├── message_exit_handler
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        ├── constants.py
        └── message_providers
        │   ├── __init__.py
        │   ├── base_provider.py
        │   ├── base_provider_test.py
        │   ├── logging_provider.py
        │   ├── logging_provider_test.py
        │   ├── slack_provider.py
        │   └── slack_provider_test.py
    ├── mlmd_client
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── client.py
        └── client_test.py
    ├── model_card_generator
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── artifact.py
        ├── artifact_test.py
        ├── component.py
        ├── component_test.py
        ├── executor.py
        ├── executor_test.py
        └── tfxtest.py
    ├── pandas_transform
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        └── null_preprocessing.py
    ├── predictions_to_bigquery
        ├── __init__.py
        ├── component.py
        ├── executor.py
        ├── executor_test.py
        ├── test_component.py
        └── utils.py
    ├── sampling
        ├── README.md
        ├── __init__.py
        ├── component.py
        ├── component_test.py
        ├── data
        │   ├── example_gen
        │   │   ├── Split-eval
        │   │   │   └── data_tfrecord-00000-of-00001.gz
        │   │   └── Split-train
        │   │   │   └── data_tfrecord-00000-of-00001.gz
        │   └── test_data.csv
        ├── example
        │   ├── __init__.py
        │   ├── data
        │   │   └── credit_fraud.csv
        │   ├── local_notebook.ipynb
        │   ├── sampler_pipeline_local.py
        │   └── sampler_utils.py
        ├── executor.py
        ├── executor_test.py
        └── spec.py
    ├── schema_curation
        ├── CONTRIBUTING.md
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── component
        │   ├── __init__.py
        │   ├── component.py
        │   ├── component_test.py
        │   ├── executor.py
        │   └── executor_test.py
        ├── example
        │   ├── __init__.py
        │   ├── module_file.py
        │   ├── taxi_example_colab.ipynb
        │   └── taxi_example_local.py
        └── test_data
        │   ├── module_file
        │       └── module_file.py
        │   └── schema_gen
        │       ├── __init__.py
        │       └── schema.pbtxt
    ├── utils
        ├── __init__.py
        ├── test_utils.py
        └── test_utils_tests.py
    ├── version.py
    └── xgboost_evaluator
        ├── README.md
        ├── RELEASE.md
        ├── __init__.py
        ├── component.py
        ├── data
            └── penguins_processed.csv
        ├── xgboost_predict_extractor.py
        └── xgboost_predict_extractor_test.py


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Expected Behavior
 2 | 
 3 | 
 4 | ## Actual Behavior
 5 | 
 6 | 
 7 | ## Steps to Reproduce the Problem
 8 | 
 9 | 1.
10 | 1.
11 | 1.
12 | 
13 | ## Specifications
14 | 
15 | - Version:
16 | - Platform:


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | Fixes #<issue_number_goes_here>
2 | 
3 | > It's a good idea to open an issue first for discussion.
4 | 
5 | - [ ] Tests pass
6 | - [ ] Appropriate changes to README are included in PR


--------------------------------------------------------------------------------
/.github/ci/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | default_stages: [push,commit]
 4 | repos:
 5 | -   repo:  https://github.com/pre-commit/mirrors-yapf
 6 |     rev: v0.31.0
 7 |     hooks:
 8 |     -   id: yapf 
 9 | -   repo: https://github.com/pycqa/isort
10 |     rev: 5.11.5
11 |     hooks:
12 |     -   id: isort
13 |         name: isort (python)
14 | -   repo: https://github.com/PyCQA/pylint
15 |     rev: v2.8.3
16 |     hooks:
17 |     -   id: pylint


--------------------------------------------------------------------------------
/.github/ci/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style=pep8
3 | indent_width=2
4 | ALLOW_MULTILINE_DICTIONARY_KEYS=True
5 | 


--------------------------------------------------------------------------------
/.github/workflows/automerge.yml:
--------------------------------------------------------------------------------
 1 | name: Automatic merging
 2 | on:
 3 |   pull_request_target: { types: [opened, synchronize] }
 4 |   issue_comment: { types: [created] }
 5 | 
 6 | jobs:
 7 |   automerge:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v1
11 |       - name: Run Codeowners merge check
12 |         uses: casassg/auto-merge-bot@v0.3
13 |         env:
14 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
15 |         with:
16 |           merge_method: 'squash'
17 |           assign_reviewer: 'false'


--------------------------------------------------------------------------------
/.github/workflows/cherrypick.yml:
--------------------------------------------------------------------------------
 1 | name: Cherry pick
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 | jobs:
 6 |   cherry-pick:
 7 |     name: Cherry Pick
 8 |     # Only cherry pick if user is a release manager
 9 |     # NB(gcasassaez): We unfortunately have to use fromJSON as GitHub doesn't have a way to specify constant arrays 
10 |     # See:  https://github.community/t/passing-an-array-literal-to-contains-function-causes-syntax-error/17213/3
11 |     if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/cherry-pick') && contains(fromJson('["casassg", "hanneshapke"]'), github.event.sender.login)
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout the latest code
15 |         uses: actions/checkout@v2
16 |         with:
17 |           token: ${{ secrets.GITHUB_TOKEN }}
18 |           fetch-depth: 0 # otherwise, you will fail to push refs to dest repo
19 |       - name: Automatic Cherry Pick
20 |         uses: vendoo/gha-cherry-pick@v1
21 |         env:
22 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'tfx_addons/**'
 7 |       - '.github/workflows/ci.yml'
 8 |       - '.github/workflows/filter_projects.py'
 9 |       - 'setup.py'
10 |       - 'pyproject.toml'
11 |       - '.github/ci/deps/**'
12 |     branches:
13 |       - main
14 |       - r*
15 |   pull_request:
16 |     paths:
17 |       - 'tfx_addons/**'
18 |       - '.github/workflows/ci.yml'
19 |       - '.github/workflows/filter_projects.py'
20 |       - 'setup.py'
21 |       - 'pyproject.toml'
22 |       - '.github/ci/deps/**'
23 |     branches:
24 |       - main
25 |       - r*
26 | 
27 | concurrency: 
28 |   group: ${{ github.workflow }}-${{ github.ref }}
29 |   cancel-in-progress: true
30 | 
31 | jobs:
32 |   filter_projects:
33 |     # Dynamic matrix trick inspired by https://www.cynkra.com/blog/2020-12-23-dynamic-gha/
34 |     runs-on: ubuntu-latest
35 |     timeout-minutes: 60
36 |     outputs:
37 |       projects: ${{ steps.set-matrix.outputs.projects }}
38 |     steps:
39 |     - uses: actions/checkout@v2
40 |     - name: Set up Python 3.7
41 |       uses: actions/setup-python@v2
42 |       with:
43 |         python-version: 3.7
44 |     - name: Get Changed Files
45 |       id: changed_files
46 |       uses: trilom/file-changes-action@v1.2.4
47 |       with:
48 |         output: json
49 |     - name: Filter projects
50 |       id: set-matrix
51 |       run: |
52 |         echo "projects=$(python ./.github/workflows/filter_projects.py $HOME/files.json)" >> $GITHUB_OUTPUT
53 | 
54 |   ci:
55 |     runs-on: ubuntu-latest
56 |     needs: filter_projects
57 |     timeout-minutes: 60
58 |     if: needs.filter_examples.outputs.projects != '[]'
59 |     strategy:
60 |       # Test for each project in parallel using ci_max and ci_min to ensure 
61 |       # tested in range of tfx/tensorflow supported versions
62 |       matrix:
63 |         project: ${{fromJson(needs.filter_projects.outputs.projects)}}
64 |         depconstraint: 
65 |           - ci_max
66 |           - ci_min
67 |     steps:
68 |     - uses: actions/checkout@v2
69 |     - name: Set up Python 3.7
70 |       uses: actions/setup-python@v2
71 |       with:
72 |         python-version: 3.7
73 |     - name: Cache python environment
74 |       uses: actions/cache@v2
75 |       with:
76 |         # Cache pip
77 |         path: ~/.cache/pip
78 |         # Look to see if there is a cache hit for the corresponding setup.py + TFX version
79 |         key: ${{ runner.os }}-pip-${{ matrix.depconstraint }}-${{ hashFiles('tfx_addons/version.py') }}
80 |         restore-keys: |
81 |           ${{ runner.os }}-pip-${{ matrix.depconstraint }}
82 |     - name: Install dependencies
83 |       run: |
84 |         python -m pip install --upgrade pip wheel
85 |         python -m pip install -e ".[${{ matrix.project }}, ${{ matrix.depconstraint }}, test]"
86 |     - name: Run tests
87 |       run: pytest tfx_addons/${{ matrix.project }}
88 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_examples.yml:
--------------------------------------------------------------------------------
 1 | name: Examples CI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'tfx_addons/**'
 7 |       - 'examples/**'
 8 |       - '.github/workflows/ci_examples.yml'
 9 |       - '.github/workflows/filter_examples.py'
10 |       - 'setup.py'
11 |       - 'pyproject.toml'
12 |     branches:
13 |       - main
14 |       - r*
15 |   pull_request:
16 |     paths:
17 |       - 'tfx_addons/**'
18 |       - 'examples/**'
19 |       - '.github/workflows/ci_examples.yml'
20 |       - '.github/workflows/filter_examples.py'
21 |       - 'setup.py'
22 |       - 'pyproject.toml'
23 |     branches:
24 |       - main
25 |       - r*
26 | 
27 | concurrency: 
28 |   group: ${{ github.workflow }}-${{ github.ref }}
29 |   cancel-in-progress: true
30 | 
31 | jobs:
32 |   filter_examples:
33 |     # Dynamic matrix trick inspired by https://www.cynkra.com/blog/2020-12-23-dynamic-gha/
34 |     runs-on: ubuntu-latest
35 |     timeout-minutes: 60
36 |     outputs:
37 |       projects: ${{ steps.set-matrix.outputs.projects }}
38 |     steps:
39 |     - uses: actions/checkout@v2
40 |     - name: Set up Python 3.7
41 |       uses: actions/setup-python@v2
42 |       with:
43 |         python-version: 3.7
44 |     - name: Get Changed Files
45 |       id: changed_files
46 |       uses: trilom/file-changes-action@v1.2.4
47 |       with:
48 |         output: json
49 |     - name: Filter example projects
50 |       id: set-matrix
51 |       run: |
52 |         echo "projects=$(python ./.github/workflows/filter_examples.py $HOME/files.json)" >> $GITHUB_OUTPUT
53 |   ci-examples:
54 |     runs-on: ubuntu-latest
55 |     needs: filter_examples
56 |     timeout-minutes: 60
57 |     if: needs.filter_examples.outputs.projects != '[]'
58 |     strategy:
59 |       # Test for each project in parallel using ci_max and ci_min to ensure 
60 |       # tested in range of tfx/tensorflow supported versions
61 |       matrix:
62 |         project: ${{fromJson(needs.filter_examples.outputs.projects)}}
63 |     steps:
64 |     - uses: actions/checkout@v2
65 |     - name: Set up Python 3.7
66 |       uses: actions/setup-python@v2
67 |       with:
68 |         python-version: 3.7
69 |     - name: Cache python environment
70 |       uses: actions/cache@v2
71 |       with:
72 |         # Cache installed dependencies
73 |         path: ~/.cache/pip
74 |         # Look to see if there is a cache hit for the corresponding requirement.txt + project name 
75 |         key: ${{ runner.os }}-pip-ciexamples-${{ matrix.project }}-${{ hashFiles(format('examples/{0}/requirements.txt', matrix.project)) }}
76 |         restore-keys: |
77 |           ${{ runner.os }}-pip-ciexamples-${{ matrix.project }}
78 |     - name: Install dependencies
79 |       run: |
80 |         python -m pip install --upgrade pip wheel pytest
81 |         cd examples/${{ matrix.project }}
82 |         pip install -r requirements.txt
83 |     - name: Run tests
84 |       run: |
85 |         cd examples/${{ matrix.project }}
86 |         python -m pytest .
87 |   
88 | 


--------------------------------------------------------------------------------
/.github/workflows/filter_projects.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Internal script to parse changed files and potential pkgs and returns the overlap"""
16 | 
17 | import argparse
18 | import json
19 | import logging
20 | import os
21 | from typing import List
22 | 
23 | logging.getLogger().setLevel(logging.INFO)
24 | 
25 | # NB(casassg): Files that if changed should trigger running CI for all projects.
26 | # This are files which are core and we want to avoid causing outages
27 | # because of them
28 | RUN_ALL_FILES = [
29 |     "tfx_addons/version.py", "setup.py", ".github/workflows/ci.yml",
30 |     "pyproject.toml"
31 | ]
32 | 
33 | # Get event that triggered workflow
34 | # See: https://docs.github.com/en/actions/learn-github-actions/environment-variables#default-environment-variables
35 | GH_EVENT_NAME = os.environ.get("GITHUB_EVENT_NAME", "unknown")
36 | 
37 | 
38 | def _get_testable_projects() -> List[str]:
39 |   """Get _PKG_METADATA from version.py which contains what projects are active
40 |   """
41 |   context = {}
42 |   base_dir = os.path.dirname(
43 |       os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
44 |   with open(os.path.join(base_dir, "tfx_addons", "version.py")) as fp:
45 |     exec(fp.read(), context)  # pylint: disable=exec-used
46 | 
47 |   return list(context["_PKG_METADATA"].keys())
48 | 
49 | 
50 | def get_affected_projects(affected_files: List[str]) -> List[str]:
51 |   """Given a list of affected files, and  projects that can be tested,
52 |   find what projects should CI run"""
53 | 
54 |   logging.info("Found affected files: %s", affected_files)
55 |   testable_projects = _get_testable_projects()
56 |   if GH_EVENT_NAME == "push":
57 |     logging.info("GitHub Action trigger is %s, running all projects",
58 |                  GH_EVENT_NAME)
59 |     return testable_projects
60 |   else:
61 |     logging.info("GitHub Action trigger is %s, filtering projects",
62 |                  GH_EVENT_NAME)
63 |   for run_all_file in RUN_ALL_FILES:
64 |     if run_all_file in affected_files:
65 |       logging.warning("Found change in %s, running all projects", run_all_file)
66 |       return testable_projects
67 |   projects_to_test = set()
68 |   for file in affected_files:
69 |     if file.startswith("tfx_addons"):
70 |       file_component = file.replace("tfx_addons/", "").split("/",
71 |                                                              maxsplit=1)[0]
72 |       if file_component in testable_projects:
73 |         logging.info("Package %s is marked for testing", file_component)
74 |         projects_to_test.add(file_component)
75 |       else:
76 |         logging.warning(
77 |             "Package %s is not in _PKG_TESTABLE variable for version.py",
78 |             file_component)
79 |   return list(projects_to_test)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |   parser = argparse.ArgumentParser()
84 |   parser.add_argument("file_manifest")
85 | 
86 |   args = parser.parse_args()
87 | 
88 |   with open(args.file_manifest, "r") as f:
89 |     affected_components = get_affected_projects(json.load(f))
90 |   print(json.dumps(affected_components))
91 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint 
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - '**.py'
 7 |       - '.github/ci/**'
 8 |       - '.github/workflows/lint.yml'
 9 |     branches:
10 |       - main
11 |       - r*
12 |   pull_request:
13 |     paths:
14 |       - '**.py'
15 |       - '.github/ci/**'
16 |       - '.github/workflows/lint.yml'
17 |     branches:
18 |       - main
19 |       - r*
20 | 
21 | jobs:
22 |   pre-commit-checks:
23 |     runs-on: ubuntu-latest
24 |     timeout-minutes: 60
25 |     steps:
26 |     - uses: actions/checkout@v2
27 |     - uses: pre-commit/action@v2.0.3
28 |       name: Run pre-commit checks (pylint/yapf/isort)
29 |       env:
30 |         SKIP: insert-license
31 |       with:
32 |         extra_args: --hook-stage push --all-files
33 | 


--------------------------------------------------------------------------------
/.github/workflows/minor_release.yml:
--------------------------------------------------------------------------------
 1 | name: Create Minor Release
 2 | on:
 3 |   workflow_dispatch:
 4 | 
 5 | jobs:
 6 |   createrelease:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Check out code
11 |       uses: actions/checkout@v2
12 |     - name: Set minor version
13 |       id: set-version
14 |       run: |
15 |         echo "::set-output name=version::$(python ./.github/workflows/prepare_minor_release.py)"
16 |     - name: Create release branch
17 |       run: git checkout -b r${{ steps.set-version.outputs.version }}
18 |     - name: Initialize mandatory git config
19 |       run: |
20 |        git config user.name "GitHub Actions"
21 |        git config user.email noreply@github.com
22 |     - name: Commit changelog and manifest files
23 |       id: make-commit
24 |       run: |
25 |         git add tfx_addons/version.py
26 |         git commit --message "Prepare release ${{ steps.set-version.outputs.version }}"
27 |         echo "::set-output name=commit::$(git rev-parse HEAD)"
28 |     - name: Push new branch
29 |       run: git push origin r${{ steps.set-version.outputs.version }}
30 |     - uses: ncipollo/release-action@v1
31 |       with:
32 |         name: v${{ steps.set-version.outputs.version }}.0rc0
33 |         commit: ${{ steps.make-commit.outputs.commit }}
34 |         prerelease: true
35 |         draft: true
36 |         generateReleaseNotes: true
37 |         skipIfReleaseExists: true
38 |         tag: v${{ steps.set-version.outputs.version }}.0rc0
39 |     - name: Update main
40 |       id: update-main
41 |       run: |
42 |         git checkout main
43 |         echo "::set-output name=new_version::$(python ./.github/workflows/update_main.py)"
44 |     - name: Commit main change
45 |       run: |
46 |         git checkout -b ${{ github.triggering_actor }}/update-${{ steps.update-main.outputs.new_version }}
47 |         git add tfx_addons/version.py
48 |         git commit --message "Update main to ${{ steps.update-main.outputs.new_version }}"
49 |         git push origin ${{ github.triggering_actor }}/update-${{ steps.update-main.outputs.new_version }}
50 |         
51 |     - name: Create pull request into main
52 |       uses: thomaseizinger/create-pull-request@1.0.0
53 |       with:
54 |        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
55 |        head: ${{ github.triggering_actor }}/update-${{ steps.update-main.outputs.new_version }}
56 |        base: main
57 |        title: Update minor version to ${{ steps.update-main.outputs.new_version }}
58 |        reviewers: ${{ github.triggering_actor }}
59 |        body: |
60 |            This is an automatic PR triggered by ${{ github.triggering_actor }} to prepare for ${{ steps.set-version.outputs.version }} release.
61 |            
62 |            Approve and merge in order to update main branch to ${{ steps.update-main.outputs.new_version }}.
63 |            
64 |            Check out [RELEASE.md](https://github.com/tensorflow/tfx-addons/blob/main/RELEASE.md) for more details.
65 | 


--------------------------------------------------------------------------------
/.github/workflows/prepare_minor_release.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Internal script to perform a minor release"""
16 | import logging
17 | import os
18 | import sys
19 | 
20 | logging.getLogger().setLevel(logging.INFO)
21 | # Dynamically load root as module so that we can import version
22 | BASE_DIR = os.path.dirname(
23 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
24 | sys.path.append(BASE_DIR)
25 | 
26 | import tfx_addons as tfxa  # pylint: disable=wrong-import-position
27 | 
28 | current_version = tfxa.__version__
29 | major, minor, patch = current_version.split(".")
30 | 
31 | with open(os.path.join(BASE_DIR, "tfx_addons", "version.py")) as f:
32 |   lines = f.readlines()
33 | 
34 | with open(os.path.join(BASE_DIR, "tfx_addons", "version.py"), "w") as f:
35 |   for l in lines:
36 |     if l.startswith("_VERSION_SUFFIX"):
37 |       f.write('_VERSION_SUFFIX = "rc0"\n')
38 |     else:
39 |       f.write(l)
40 | 
41 | print(".".join([major, minor]))
42 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release TFX Addons package to PyPI and TestPyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'tfx_addons/**'
 7 |       - 'setup.py'
 8 |       - 'pyproject.toml'
 9 |     branches:
10 |       - main
11 |       - r*
12 |   release:
13 |     types: [published]
14 |     tags:
15 |       - v*
16 | 
17 | jobs:
18 |   build-and-publish:
19 |     name: Build TFX Addons PyPI package and release to PyPI and TestPyPI
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Set up Python 3.7
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: 3.7
27 |     - name: Install pypa/build
28 |       run: python -m pip install build --user
29 |     - name: Build a binary wheel and a source tarball
30 |       run: python -m build --sdist --wheel --outdir dist/ .
31 |     - name: Publish distribution TFX Addons package to Test PyPI
32 |       uses: pypa/gh-action-pypi-publish@v1.5.0
33 |       with:
34 |         password: ${{ secrets.TEST_PYPI_API_TOKEN }}
35 |         repository_url: https://test.pypi.org/legacy/
36 |         skip_existing: true
37 |     - name: Publish distribution TFX Addons package to PyPI
38 |       if: github.event_name == 'release'
39 |       uses: pypa/gh-action-pypi-publish@v1.5.0
40 |       with:
41 |         password: ${{ secrets.PYPI_API_TOKEN }}
42 |     - name: Upload files to a GitHub release
43 |       uses: svenstaro/upload-release-action@2.2.1
44 |       if: github.event_name == 'release'
45 |       with:
46 |         repo_token: ${{ secrets.GITHUB_TOKEN }}
47 |         file: dist/*
48 |         tag: ${{ github.ref }}
49 |         overwrite: true
50 |         file_glob: true
51 | 


--------------------------------------------------------------------------------
/.github/workflows/update_main.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Internal script to perform a minor release"""
16 | import logging
17 | import os
18 | import sys
19 | 
20 | logging.getLogger().setLevel(logging.INFO)
21 | # Dynamically load root as module so that we can import version
22 | BASE_DIR = os.path.dirname(
23 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
24 | sys.path.append(BASE_DIR)
25 | 
26 | import tfx_addons as tfxa  # pylint: disable=wrong-import-position
27 | 
28 | current_version = tfxa.__version__
29 | major, minor, patch = current_version.split(".")
30 | 
31 | with open(os.path.join(BASE_DIR, "tfx_addons", "version.py")) as f:
32 |   lines = f.readlines()
33 | 
34 | with open(os.path.join(BASE_DIR, "tfx_addons", "version.py"), "w") as f:
35 |   for l in lines:
36 |     if l.startswith("_MINOR_VERSION"):
37 |       next_minor = int(minor) + 1
38 |       f.write(f'_MINOR_VERSION = "{next_minor}"\n')
39 |     else:
40 |       f.write(l)
41 | 
42 | print(".".join([major, str(next_minor)]))
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # LINT.IfChange
 2 | # Byte-compiled / optimized / DLL files
 3 | __pycache__/
 4 | **/*.py[cod]
 5 | **/*$py.class
 6 | 
 7 | # Mac folder attributes
 8 | **/.DS_Store
 9 | 
10 | # C extensions
11 | **/*.so
12 | 
13 | # Unit test
14 | .pytest_cache/
15 | 
16 | # Distribution / packaging
17 | .Python
18 | # build/  # build/ contains required files for building tfx packages.
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | pip-wheel-metadata/
31 | share/python-wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 | 
37 | # Virtual environments
38 | .venv/*
39 | env/*
40 | **/env
41 | **/venv
42 | 
43 | # pyenv
44 | .python-version
45 | 
46 | # Editor
47 | .idea/*
48 | .vscode/*
49 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | .github/ci/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | .github/ci/.pylintrc


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | .github/ci/.style.yapf


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Global owners for SIG TFX-Addons
 2 | * @rcrowe-google @theadactyl
 3 | 
 4 | # SIG operations: proposals and contributions guidelines
 5 | /proposals/ @rcrowe-google
 6 | 
 7 | # SIG core members
 8 | /CONTRIBUTING.md @rcrowe-google @casassg @hanneshapke @codesue @deutranium @wihanbooyse @BACtaki
 9 | /README.md @casassg @hanneshapke @codesue @deutranium @wihanbooyse @BACtaki
10 | 
11 | # PyPi distribution files
12 | /tfx_addons/__init__.py @casassg @hanneshapke @codesue @deutranium @wihanbooyse @BACtaki
13 | /tfx_addons/version.py @casassg @hanneshapke @codesue @deutranium @wihanbooyse @BACtaki
14 | /setup.py @casassg @hanneshapke @codesue @deutranium @wihanbooyse @BACtaki
15 | /pyproject.toml @casassg @hanneshapke @codesue @deutranium @wihanbooyse @BACtaki
16 | 
17 | # CI/CD configuration (Release team)
18 | /.github/workflows/ @casassg @hanneshapke
19 | /.github/ci  @casassg @hanneshapke
20 | /RELEASE.md @casassg @hanneshapke
21 | 
22 | # Sci-Kit Learn Example using the Penguins dataset
23 | /examples/sklearn_penguins/ @TheMichaelHu @1025KB
24 | 
25 | # MLMD Client Library
26 | /tfx_addons/mlmd_client/ @codesue @pselden @casassg
27 | 
28 | # ExampleFilter Component
29 | /tfx_addons/example_filter/ @rclough
30 | 
31 | # Schema Curation Component
32 | /tfx_addons/schema_curation/ @pratishtha-abrol @FatimahAdwan @deutranium @nirzu97
33 | 
34 | # XGBoost Evaluator Component
35 | /tfx_addons/xgboost_evaluator @kindalime @cent5 @casassg
36 | /examples/xgboost_penguins @kindalime @cent5 @casassg
37 | 
38 | # Sampling Component
39 | /tfx_addons/sampling @kindalime @cent5 @casassg
40 | 
41 | # Feast ExampleGen Component
42 | /tfx_addons/feast_examplegen @BACtaki @casassg @wihanbooyse
43 | /examples/fraud_feast @BACtaki @casassg @wihanbooyse
44 | 
45 | # Feature Selection Component
46 | /tfx_addons/feature_selection @nirzu97 @pratishtha-abrol @FatimahAdwan @deutranium
47 | 
48 | # Firebase Publisher
49 | /tfx_addons/firebase_publisher @deep-diver @sayakpaul
50 | 
51 | # HuggingFace Pusher
52 | /tfx_addons/huggingface_pusher @deep-diver @sayakpaul
53 | 
54 | # Message Exit Handler
55 | /tfx_addons/message_exit_handler @hanneshapke
56 | /tfx_addons/utils @hanneshapke
57 | 
58 | # Predictions to Bigquery Component
59 | /tfx_addons/predictions_to_bigquery @hanneshapke @cfezequiel
60 | 
61 | # PandasTransform Component
62 | /tfx_addons/pandas_transform @rcrowe-google
63 | 
64 | # PandasTransform Component
65 | /tfx_addons/model_card_generator @codesue @hanneshapke
66 | /examples/model_card_generator @codesue @hanneshapke
67 | 
68 | # Apache Airflow Orchestrator
69 | /tfx_addons/apache_airflow @lego0901
70 | 
71 | # CopyExampleGen Component
72 | /tfx_addons/copy_example_gen @alxndrnh
73 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # SIG Addons Releases
 2 | 
 3 | TFX Addons follows [Semantic Versioning 2.0](https://semver.org/) strategy.
 4 | 
 5 | * See the [Release Notes](https://github.com/tensorflow/tfx-addons/releases) for current and past releases.
 6 | 
 7 | ## Minor automatic release from main
 8 | 
 9 | 1. Trigger [Create Minor Release](https://github.com/tensorflow/tfx-addons/actions/workflows/minor_release.yml) workflow and ensure it runs to completion.
10 | 2. Find created [draft release](https://github.com/tensorflow/tfx-addons/releases).
11 |     * Add updates for new features, enhancements, bug fixes
12 |     * Add contributors using `git shortlog <last-version>..HEAD -s`
13 | 3. Publish release.
14 |     * Check PyPI to ensure release candidate has been released.
15 |     * Send email to mailing list for vote.
16 | 4. Find the minor version PR created above and merge it.
17 | 
18 | 
19 | ## Major/Minor releases
20 | 
21 | 1. Create new `rX.Y` branch on https://github.com/tensorflow/tfx-addons from `main`.
22 | 2. Update `version.py` in `rX.Y` branch.
23 | 	* Set the correct version and suffix in [version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py).
24 | 	* Ensure the proper minimum and maximum tested versions of TFX are set in [version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py).
25 | 	* Ensure proper supported python libraries are set in [version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py).
26 | 3. Create a [new release](https://github.com/tensorflow/tfx-addons/releases) from `rX.Y` branch. Create a tag with `vX.Y.Z` name.
27 |     * Add updates for new features, enhancements, bug fixes
28 |     * Add contributors using `git shortlog <last-version>..HEAD -s`
29 | 4. Create a new PR and merge an increase of `_MINOR_VERSION` number in `main` to get ready for next release.
30 | 
31 | ## Patch releases
32 | 1. Cherry-pick commits to `rX.Y` branch. Release team can just port PR by commenting "/cherry-pick rX.Y" in a merged PR.
33 | 2. Create new PR with increasing `_PATCH_VERSION` in `version.py` against `rX.Y` branch.
34 | 	* Set the correct version and suffix in [version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py).
35 | 	* Ensure the proper minimum and maximum tested versions of TFX are set in [version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py).
36 | 	* Ensure proper supported python libraries are set in [version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py).
37 | 3. Create a [new release](https://github.com/tensorflow/tfx-addons/releases) from `rX.Y` branch. Create a tag with `vX.Y.Z` name.
38 |     * Add updates for new features, enhancements, bug fixes
39 |     * Add contributors using `git shortlog <last-version>..HEAD -s`
40 | 
41 | 
42 | 
43 | ## SIG Addons Release Team
44 | 
45 | Current Release Team:
46 | 
47 | - Hannes Hapke - @hanneshapke 
48 | - Gerard Casas Saez - @casassg
49 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | 
3 | This directory contains projects which provide examples for different
4 | use-cases or design approaches using TFX.


--------------------------------------------------------------------------------
/examples/example_filter/data/test_data.csv:
--------------------------------------------------------------------------------
 1 | label,col1
 2 | ,2
 3 | ,2
 4 | ,2
 5 | ,2
 6 | ,2
 7 | ,2
 8 | ,2
 9 | ,2
10 | ,2
11 | ,2
12 | 1,1
13 | 1,1
14 | 1,1
15 | 1,1
16 | 1,1
17 | 1,1
18 | 1,1
19 | 1,1
20 | 1,1
21 | 1,1
22 | 1,1
23 | 1,1
24 | 1,1
25 | 1,1
26 | 1,1
27 | 1,1
28 | 1,1
29 | 1,1
30 | 1,1
31 | 1,1
32 | 1,1
33 | 1,1
34 | 0,0
35 | 0,0
36 | 0,0
37 | 0,0
38 | 0,0
39 | 0,0
40 | 0,0
41 | 0,0
42 | 0,0
43 | 0,0
44 | 0,0
45 | 0,0
46 | 0,0
47 | 0,0
48 | 0,0
49 | 0,0
50 | 0,0
51 | 0,0


--------------------------------------------------------------------------------
/examples/example_filter/filter_function.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Filters the data from input data by using the filter function."""
16 | 
17 | 
18 | def filter_function(x_list):
19 |   """Filters the data from input data by using the filter function.
20 | 
21 |       Args:
22 |         x_list: Input list of data to be filtered.
23 | 
24 | 
25 |       Returns:
26 |         filtered list
27 | 
28 |       """
29 |   new_list = []
30 |   for element in x_list:
31 |     if element['label'] == [0]:
32 |       new_list.append(element)
33 |   return new_list
34 | 


--------------------------------------------------------------------------------
/examples/fraud_feast/README.md:
--------------------------------------------------------------------------------
 1 | # Fraud feast Example
 2 | 
 3 | Expanded [Feast Fraud tutorial](https://github.com/feast-dev/feast-fraud-tutorial/blob/4acf205dfbb3615d2f3e913adf1c28c5f2655f4c/notebooks/Fraud_Detection_Tutorial.ipynb) to use TFX-Addons [FeastExampleGen](/tfx_addons/feast_examplegen/README.md)
 4 | 
 5 | ## Instructions
 6 | 
 7 | Clone the tfx-addons repo and navigate to the fraud_feast directory.
 8 | 
 9 | <pre class="devsite-terminal devsite-click-to-copy">
10 | git clone https://github.com/tensorflow/tfx-addons.git
11 | cd tfx-addons/examples/fraud_feast
12 | </pre>
13 | 
14 | Next, create a Python virtual environment for this example, activate the
15 | environment, and install dependencies. Make sure you are using a version of
16 | python supported by TFX.
17 | 
18 | <pre class="devsite-terminal devsite-click-to-copy">
19 | python -m venv venv
20 | source ./venv/bin/activate
21 | pip install -r requirements.txt
22 | </pre>
23 | 
24 | ### Local Example
25 | Initialize Feast repository and run local file.
26 | 
27 | <pre class="devsite-terminal devsite-click-to-copy">
28 | cd repo && feast apply && cd ..
29 | python feast_pipeline_local.py
30 | </pre>
31 | 


--------------------------------------------------------------------------------
/examples/fraud_feast/repo/.gitignore:
--------------------------------------------------------------------------------
1 | data/*.db


--------------------------------------------------------------------------------
/examples/fraud_feast/repo/driver_repo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Demo repository for credit card transations fraud dataset
16 | """
17 | from datetime import timedelta
18 | 
19 | from feast import BigQuerySource, Entity, FeatureView, ValueType
20 | 
21 | # Add an entity for users
22 | user_entity = Entity(
23 |     name="user_id",
24 |     description=
25 |     "A user that has executed a transaction or received a transaction",
26 |     value_type=ValueType.STRING)
27 | 
28 | # Add two FeatureViews based on existing tables in BigQuery
29 | user_account_fv = FeatureView(
30 |     name="user_account_features",
31 |     entities=["user_id"],
32 |     ttl=timedelta(weeks=52),
33 |     batch_source=BigQuerySource(
34 |         table_ref="feast-oss.fraud_tutorial.user_account_features",
35 |         event_timestamp_column="feature_timestamp"))
36 | 
37 | user_has_fraudulent_transactions_fv = FeatureView(
38 |     name="user_has_fraudulent_transactions",
39 |     entities=["user_id"],
40 |     ttl=timedelta(weeks=52),
41 |     batch_source=BigQuerySource(
42 |         table_ref="feast-oss.fraud_tutorial.user_has_fraudulent_transactions",
43 |         event_timestamp_column="feature_timestamp"))
44 | 


--------------------------------------------------------------------------------
/examples/fraud_feast/repo/feature_store.yaml:
--------------------------------------------------------------------------------
1 | project: fraud_tutorial
2 | registry: ./data/registry.db
3 | provider: gcp
4 | online_store: 
5 |   type: sqlite


--------------------------------------------------------------------------------
/examples/fraud_feast/requirements.txt:
--------------------------------------------------------------------------------
1 | ../..[feast_examplegen]


--------------------------------------------------------------------------------
/examples/model_card_generator/.gitignore:
--------------------------------------------------------------------------------
1 | # unnecessary project files
2 | census_income_constants.py
3 | census_income_trainer.py
4 | census_income_transform.py
5 | 


--------------------------------------------------------------------------------
/examples/pandas_transform/README.md:
--------------------------------------------------------------------------------
 1 | # PandasTransform
 2 | ## TL;DR
 3 | PandasTransform is a TFX component which can be used instead of the standard Transform component, and allows you to work with Pandas dataframes for your feature engineering.  Processing is distributed using Beam for scalability.  Operations which require a full pass over the dataset are not currently supported.  Statistics such as the standard deviation, which are required for operations such as z-score normalization, are supplied using the statistics which are captured by StatisticsGen.
 4 | 
 5 | ## This Example
 6 | This example notebook shows how to use the PandasTransform component in a TFX pipeline.  Notice in particular the way that StatisticsGen is used to create statistics for both the raw dataset and the transformed dataset.
 7 | 
 8 | Note that although this example does use a TensorFlow model, since PandasTransform does not create a Transform graph the feature engineering which is done in PandasTransform will need to be applied separately during serving.
 9 | 
10 | ## Project Team
11 | Robert Crowe (rcrowe-google) robertcrowe--at--google--dot--com
12 | 


--------------------------------------------------------------------------------
/examples/pandas_transform/requirements.txt:
--------------------------------------------------------------------------------
1 | ../..[pandas_transform]
2 | 


--------------------------------------------------------------------------------
/examples/sklearn_penguins/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled
 2 | __pycache__/
 3 | 
 4 | # Unit test / coverage reports
 5 | htmlcov/
 6 | .tox/
 7 | .nox/
 8 | .coverage
 9 | .coverage.*
10 | .cache
11 | nosetests.xml
12 | coverage.xml
13 | *.cover
14 | *.py,cover
15 | .hypothesis/
16 | .pytest_cache/
17 | cover/
18 | 
19 | # Jupyter Notebook
20 | .ipynb_checkpoints
21 | 
22 | # Environments
23 | venv/
24 | 
25 | # Compiled KFP pipelines.
26 | *.tar.gz
27 | 
28 | # TFX artifacts
29 | serving_model/
30 | 


--------------------------------------------------------------------------------
/examples/sklearn_penguins/README.md:
--------------------------------------------------------------------------------
 1 | <hr />
 2 | <h1>ARCHIVED</h1>
 3 | 
 4 | This example is archived because there is no currently active owner.  It is also basically a duplicate of the [existing example in the main TFX repo](https://github.com/tensorflow/tfx/tree/master/tfx/examples/penguin/experimental), which is maintained by the TFX team.
 5 | 
 6 | This example could be expanded to include a broader use of Scikit-Learn.  If anyone is interested in expanding or working on this, you can check out the code in [v0.3.0 release](https://github.com/tensorflow/tfx-addons/tree/v0.3.0/examples/sklearn_penguins).  
 7 | 
 8 | Please contact the TFX-Addons maintainers to request ownership.
 9 | 
10 | <hr />
11 | 
12 | # Penguin Classification Scikit-learn Example
13 | 
14 | Expanded the [TFX penguin example
15 | pipeline](https://github.com/tensorflow/tfx/tree/master/tfx/examples/penguin)
16 | with instructions for using [scikit-learn](https://scikit-learn.org/stable/)
17 | to build and train the model.
18 | 
19 | ## Instructions
20 | 
21 | Clone the tfx-addons repo and navigate to the penguin directory.
22 | 
23 | <pre class="devsite-terminal devsite-click-to-copy">
24 | git clone https://github.com/tensorflow/tfx-addons.git
25 | cd tfx-addons/examples/sklearn_penguins
26 | </pre>
27 | 
28 | Next, create a Python virtual environment for this example, activate the
29 | environment, and install dependencies. Make sure you are using a version of
30 | python supported by TFX.
31 | 
32 | <pre class="devsite-terminal devsite-click-to-copy">
33 | python -m venv venv
34 | source ./penguin/bin/activate
35 | pip install -r requirements.txt
36 | </pre>
37 | 
38 | ### Local Example
39 | Execute the pipeline python file. Output can be found at `~/tfx`:
40 | 
41 | <pre class="devsite-terminal devsite-click-to-copy">
42 | python penguin_pipeline_sklearn_local.py
43 | </pre>
44 | 
45 | ### GCP Example
46 | This example uses a custom container image instead of the default TFX ones found
47 | [here](gcr.io/tfx-oss-public/tfx). This custom container ensures the proper
48 | version of scikit-learn is installed. Run the following commands to build this
49 | image and upload it to Google Container Registry (GCR).
50 | 
51 | <pre class="devsite-terminal devsite-click-to-copy">
52 | cd ~/penguin/experimental
53 | gcloud auth configure-docker
54 | docker build \
55 |   --tag gcr.io/[PROJECT-ID]/tfx-example-sklearn \
56 |   --build-arg TFX_VERSION=$(python -c 'import tfx; print(tfx.__version__)') \
57 |   .
58 | docker push gcr.io/[PROJECT-ID]/tfx-example-sklearn
59 | </pre>
60 | 
61 | Note that the custom container extends an official TFX container image based on
62 | the local TFX version. If an unreleased version of TFX is being used
63 | (e.g. installing from HEAD), `Dockerfile` may need to be modified to install the
64 | unreleased version.
65 | 
66 | Set the project id and bucket in `penguin_pipeline_sklearn_gcp.py`. Then, run
67 | the following commands to copy the `~/penguin` directory to GCS and execute the
68 | pipeline python file. Output can be found at `[BUCKET]/tfx`.
69 | 
70 | <pre class="devsite-terminal devsite-click-to-copy">
71 | vi penguin_pipeline_sklearn_gcp.py
72 | gsutil -m cp -r ~/penguin/data/* gs://[BUCKET]/penguin/data/
73 | gsutil -m cp ~/penguin/experimental/\*.py gs://[BUCKET]/penguin/experimental/
74 | 
75 | tfx pipeline create \
76 |   --engine kubeflow \
77 |   --pipeline-path penguin_pipeline_sklearn_gcp.py \
78 |   --endpoint [MY-GCP-ENDPOINT.PIPELINES.GOOGLEUSERCONTENT.COM]
79 | </pre>
80 | 
81 | Note that
82 | `gsutil -m cp ~/penguin/experimental/*.py gs://[BUCKET]/penguin/experimental`
83 | will need to be run every time updates are made to the GCP example.
84 | Additionally, subsequent pipeline deployments should use `tfx pipeline update`
85 | instead of `tfx pipeline create`.
86 | 


--------------------------------------------------------------------------------
/examples/xgboost_penguins/README.md:
--------------------------------------------------------------------------------
 1 | # Penguin Classification XGBoost Example
 2 | 
 3 | Expanded the [TFX penguin example
 4 | pipeline](https://github.com/tensorflow/tfx/tree/master/tfx/examples/penguin)
 5 | and use [xgboost](https://xgboost.readthedocs.io/en/latest/)
 6 | to build and train the model.
 7 | 
 8 | Also see [XGBoost Evaluator](/tfx_addons/xgboost_evaluator/README.md) for more
 9 | context on how the trained model can evaluated.
10 | 
11 | ## Local Example
12 | Execute the pipeline python file. Output can be found at `~/tfx`:
13 | 
14 | ```
15 | python examples/xgboost_penguins/penguin_pipeline_local.py
16 | ```
17 | 
18 | ## Run e2e test
19 | 
20 | ```
21 | pip install -e ".[all,test]"`
22 | pytest examples/xgboost_penguins
23 | ```


--------------------------------------------------------------------------------
/examples/xgboost_penguins/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/examples/xgboost_penguins/penguin_pipeline_local_e2e_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """E2E Tests for penguin_pipeline_xgboost_local."""
16 | 
17 | import os
18 | from typing import Text
19 | 
20 | import tensorflow as tf
21 | from tfx import v1 as tfx
22 | from tfx.orchestration import metadata
23 | 
24 | from . import penguin_pipeline_local
25 | 
26 | 
27 | class PenguinPipelineLocalEndToEndTest(tf.test.TestCase):
28 |   def setUp(self):
29 |     super().setUp()
30 |     self._test_dir = os.path.join(
31 |         os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
32 |         self._testMethodName)
33 |     self._penguin_root = os.path.dirname(__file__)
34 | 
35 |     self._pipeline_name = 'xgboost_test'
36 |     self._data_root = os.path.join(self._penguin_root, 'data')
37 |     self._module_file = os.path.join(self._penguin_root, 'utils.py')
38 |     self._serving_model_dir = os.path.join(self._test_dir, 'serving_model')
39 |     self._pipeline_root = os.path.join(self._test_dir, 'tfx', 'pipelines',
40 |                                        self._pipeline_name)
41 |     self._metadata_path = os.path.join(self._test_dir, 'tfx', 'metadata',
42 |                                        self._pipeline_name, 'metadata.db')
43 | 
44 |   def assertExecutedOnce(self, component: Text) -> None:
45 |     """Check the component is executed exactly once."""
46 |     component_path = os.path.join(self._pipeline_root, component)
47 |     self.assertTrue(tfx.dsl.io.fileio.exists(component_path))
48 |     execution_path = os.path.join(component_path, '.system',
49 |                                   'executor_execution')
50 |     execution = tfx.dsl.io.fileio.listdir(execution_path)
51 |     self.assertLen(execution, 1)
52 | 
53 |   def assertPipelineExecution(self) -> None:
54 |     self.assertExecutedOnce('CsvExampleGen')
55 |     self.assertExecutedOnce('ExampleValidator')
56 |     self.assertExecutedOnce('SchemaGen')
57 |     self.assertExecutedOnce('StatisticsGen')
58 |     self.assertExecutedOnce('Trainer')
59 | 
60 |   def testPenguinPipelineLocal(self):
61 |     tfx.orchestration.LocalDagRunner().run(
62 |         penguin_pipeline_local.create_pipeline(
63 |             pipeline_name=self._pipeline_name,
64 |             pipeline_root=self._pipeline_root,
65 |             data_root=self._data_root,
66 |             module_file=self._module_file,
67 |             metadata_path=self._metadata_path,
68 |             beam_pipeline_args=[]))
69 | 
70 |     self.assertTrue(tfx.dsl.io.fileio.exists(self._metadata_path))
71 |     expected_execution_count = 6
72 |     metadata_config = (
73 |         tfx.orchestration.metadata.sqlite_metadata_connection_config(
74 |             self._metadata_path))
75 |     with metadata.Metadata(metadata_config) as m:
76 |       artifact_count = len(m.store.get_artifacts())
77 |       execution_count = len(m.store.get_executions())
78 |       self.assertGreaterEqual(artifact_count, execution_count)
79 |       self.assertEqual(expected_execution_count, execution_count)
80 | 
81 |     self.assertPipelineExecution()
82 | 
83 | 
84 | if __name__ == '__main__':
85 |   tf.compat.v1.enable_v2_behavior()
86 |   tf.test.main()
87 | 


--------------------------------------------------------------------------------
/examples/xgboost_penguins/requirements.txt:
--------------------------------------------------------------------------------
1 | ../..[xgboost_evaluator]
2 | xgboost>=1.0.0


--------------------------------------------------------------------------------
/proposals/20210404-sklearn_example.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | 
 3 | # Project Proposal
 4 | 
 5 | **Your name:** Michael Hu
 6 | 
 7 | **Your email:** humichael@google.com
 8 | 
 9 | **Your company/organization:** Google
10 | 
11 | **Project name:** Scikit-learn Penguin Classification 
12 | 
13 | ## Project Description
14 | Demonstrates training a scikit-learn MLPClassifier model in a TFX pipeline. The pipeline can either run locally or on GCP using CAIP, Dataflow, and Kubeflow Pipelines.
15 | 
16 | ## Project Category
17 | Example
18 | 
19 | ## Project Use-Case(s)
20 | This example can be used to push any scikit-learn model to CAIP with minimal custom code to acquire standard TFX benefits like orchestration, data validation, gated retraining, etc. This example is currently not used within my organization.
21 | 
22 | ## Project Implementation
23 | Scikit-learn will be integrated with TFX by using the following approach:
24 | Create a custom trainer module for training the scikit-learn model using example protos.
25 | 
26 | Tensors parsed from examples will be converted to Numpy arrays.
27 | The model artifact will be stored as a pickle, which both the custom evaluator module and CAIP serving will be able to load.
28 | 
29 | Create a custom evaluator module for making predictions against the model in Evaluator.
30 | 
31 | Build a Docker container extending a TFX image for managing the scikit-learn version and dependencies when training on CAIP. This container will be hosted in the user's Google Container Registry on GCP.
32 | 
33 | CAIP supports serving scikit-learn models out of the box.
34 | 
35 | The project will not be packaged. Instead, users just need to clone the source code to run the example.
36 | 
37 | ## Project Dependencies
38 | 'scikit-learn>=0.23,<0.24'
39 | kfp
40 | 
41 | ## Project Team
42 | Michael Hu, humichael@google.com
43 | 
44 | Jiayi Zhao, jyzhao@google.com 
45 | 
46 | 


--------------------------------------------------------------------------------
/proposals/20210507-mlmd_client_lib.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Gerard Casas Saez
 5 | 
 6 | **Your email:** gerard@twitter.com
 7 | 
 8 | **Your company/organization:** Twitter
 9 | 
10 | **Project name:** TFX MLMD Client Library
11 | 
12 | ## Project Description
13 | 
14 | Client library to inspect content in ML Metadata populated by TFX pipelines. Library will be written in Python and distributed through PyPi.
15 | Given metadata connection information, it should provide easy to use methods to introspect the Metadata DB.
16 | 
17 | Idea from [#12](https://github.com/tensorflow/tfx-addons/issues/12)
18 | 
19 | ## Project Category
20 | 
21 | Client Library
22 | 
23 | ## Project Use-Case(s)
24 | 
25 | 
26 | ML Metadata contains all the metadata for TFX pipelines (pipeline state, component execution, artifact lineage...). 
27 | However currently to query pipeline information you need to write custom code every time, as there is no common library that provides an abstraction layer on top
28 | of the raw ML Metadata library. 
29 | 
30 | Several libraries have implemented their own implementation of this library as seen in [ModelCards](https://github.com/tensorflow/model-card-toolkit/blob/master/model_card_toolkit/utils/tfx_util.py), [NitroML](https://github.com/google/nitroml/tree/master/nitroml/analytics) 
31 | or [Airflow example](https://github.com/tensorflow/tfx/blob/master/tfx/examples/airflow_workshop/notebooks/tfx_utils.py) in TFX repository.
32 | 
33 | Twitter already has a small implementation of this library used to track pipeline state from interactive environments.
34 | 
35 | Project will need close collaboration with TFX team to stabilize the context types ids used by TFX to track its jobs in ML Metadata.
36 | 
37 | ## Project Implementation
38 | 
39 | _Distribution:_
40 | - Python library `tfx-addons-metadata-client` released to PyPi. (potentially `tfx-addons` if we want to include more projects in the future).
41 | - Automatic release and packaging using GitHub Actions. Versioning will depend on TFX stability for MLMD types.
42 | - Folder: `tfx/addons/metadata-client` (we will likely also need to create some .github files for automatic testing and automatic release).
43 | 
44 | _Project implementation:_
45 | 
46 | - Python client library for ML Metadata, using ML Metadata Python SDK to query the database. 
47 | - Main skeleton will be 3 model classes for Pipeline, PipelineRun and ComponentRun to introspect their status.
48 | - Artifact class methods to obtain artifacts generated by each ComponentRun, PipelineRun and ComponentRun (with optional filter by ArtifactType).
49 | - Lineage tracking for Artifact class: Obtain all artifacts that helped generate this Artifact, and check all downstream Artifacts generated by current artifact.
50 | 
51 | To be heavily based on the existing libraries by NitroML, ModelCard (see above) and [tensorflow/tfx#2415](https://github.com/tensorflow/tfx/pull/2415).
52 | 
53 | 
54 | ## Project Dependencies
55 | `ml-metadata>=0.26` - Used to query the database.
56 | `ml-pipelines-sdk>=0.26` - This will be needed to pull the type names used by TFX on ML Metadata.
57 | 
58 | ## Project Team
59 | Suzen Fylke, sue@twitter.com
60 | Vincent Nguyen, [[To be filled]]
61 | Paul Selden, paul.selden@openx.com
62 | [[TFX team member TBD]]
63 | 


--------------------------------------------------------------------------------
/proposals/20210525-examplefilter.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Ryan Clough
 5 | 
 6 | **Your email:** rclough@spotify.com
 7 | 
 8 | **Your company/organization:** Spotify
 9 | 
10 | **Project name:** Example Filter
11 | 
12 | ## Project Description
13 | Beam based component that can filter Examples based on a user-defined predicate function.
14 | 
15 | ## Project Category
16 | Choose 1: Component
17 | 
18 | ## Project Use-Case(s)
19 | Data can be imported into TFX in a number of ways, and indeed, sometimes the dataset you wish to load is not under your direct 
20 | control. In cases like these, it is useful to have a component that can filter your input data with simple rules. Ex: filter 
21 | all records where `feature_a >= 1`. 
22 | 
23 | Our organization currently has a component for this purpose that is in active use. It is not as robust as it could be.
24 | 
25 | It is also worth conidering that we may wish to try and promote this functionality to be included in the TFX core base ExampleGen,
26 | so that the filtering could be done within any ExampleGen based component.
27 | 
28 | ## Project Implementation
29 | Spotify can provide the current implementation, which is based off of an old version of Tensorflow Transform. At a high level, use
30 | of the component looks like:
31 | 
32 | ```python
33 | def predicate_fn(example)
34 |     # Throw out Examples that used a credit card
35 |     if b'Credit Card' in example['payment_type']:
36 |         return False
37 |     return True
38 | ...
39 | 
40 | filtered_examples = ExampleFilter(
41 |     examples=examples.output,
42 |     schema=schema.output,
43 |     module_file=filter_module,
44 | )
45 | ```
46 | 
47 | ## Packaging
48 | 
49 | Given that it's a Beam component, I think it will have to be a fully custom component. 
50 | 
51 | In terms of packaging and providing, we can provide the code, and a sample docker file and example pipeline for the component.
52 | 
53 | ## Future Considerations
54 | 
55 | For the purposes of this proposal, the `ExampleFilter` component will be submitted as-is, as to not let "perfect" become the 
56 | enemy of "good enough". There are a number of potential improvements that could be made to the component, but working 
57 | through them should be a separate process from this initial proposal to get a working MVP.
58 | 
59 | The current implementation is a bit dated and not so robust. It depends on a deprecated TFT proto coder, and only works on
60 | TF Records, as it does not make use of TFXIO. As part of bringing this to TFX-addons, I think it is worth iterating on the
61 | current design. Some initial ideas for change might be:
62 | 
63 | * Implementing it more flexibly in TFXIO
64 | * Determine if there's a way to implement it without requiring a schema
65 | * Making the predicate_fn operate on true data types rather than bytes (see example above)
66 | * Adding an input that allows the user to specify splits (currently applies to all splits)
67 | 
68 | ## Project Dependencies
69 | Current implementation uses a [proto decoder](https://github.com/tensorflow/transform/blob/v0.24.1/tensorflow_transform/coders/example_proto_coder.py#L329-L339)
70 | deprecated from TFX 0.25 onwards. Otherwise the project uses standard TFX dependencies.
71 | 
72 | ## Project Team
73 | * Ryan Clough, rclough@spotify.com, @rclough
74 | * TBD
75 | 


--------------------------------------------------------------------------------
/proposals/20210605-schema_curation_custom_component.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Pratishtha Abrol
 5 | 
 6 | **Your email:** pratishthaabrol@gmail.com
 7 | 
 8 | **Your company/organization:** Outreachy
 9 | 
10 | **Project name:** [Schema curation custom component](https://github.com/tensorflow/tfx-addons/issues/8)
11 | 
12 | ## Project Description
13 | This project applies Python user code from a user-supplied module file to a schema produced by SchemaGen, to curate the schema based on domain knowledge.
14 | 
15 | ## Project Category
16 | Component
17 | 
18 | ## Project Use-Case(s)
19 | This project will allow the user to add a custom component that modifies the schema generated by SchemaGen component according to user knowledge, for example, fixing domain limits that were inferred wrongly by the SchemaGen component.
20 | 
21 | ## Project Implementation
22 | Implementation of the Schema Curation Custom Component can be done using the following approach:
23 | - Get the base Schema using SchemaGen component of TFX
24 | - User supplies a module file with a fully-custom component that defines the additions/changes to the initially generated schema through SchemaGen.
25 | - And execution script would run on the module file, which sets and modifies variables accordingly.
26 | - The base schema gets modified according to the module file and used further along the pipeline
27 | 
28 | ## Project Dependencies
29 | The implementation will use the [TFDV library](https://www.tensorflow.org/tfx/data_validation/api_docs/python/tfdv) for validation and modification of schema objects according to the module file provided by the user. The following two methods would be of special focus:
30 | - [tfdv.set_domain](https://www.tensorflow.org/tfx/data_validation/api_docs/python/tfdv/set_domain)
31 | - [tfdv.write_schema_text](https://www.tensorflow.org/tfx/data_validation/api_docs/python/tfdv/write_schema_text)
32 | 
33 | A similar implementation can be seen in the [Transform library](https://github.com/tensorflow/transform). Paricularly, the [schema_utils](https://github.com/tensorflow/transform/blob/master/tensorflow_transform/tf_metadata/schema_utils.py) method could come in useful.
34 | 
35 | ## Project Team
36 | **Project Leader** : Pratishtha Abrol, pratishtha-abrol, pratishthaabrol@gmail.com
37 | 1. Fatimah Adwan, FatimahAdwan, akilahafaf72@gmail.com
38 | 2. Kshitijaa Jaglan, deutranium, jaglan.kshitijaa2@gmail.com
39 | 3. Nirzari Gupta, nirzu97, nirzu97@gmail.com
40 | 


--------------------------------------------------------------------------------
/proposals/20210721-sampling_component.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | 
 3 | # Project Proposal
 4 | 
 5 | ------
 6 | 
 7 | **Your name:** Daniel Kim
 8 | 
 9 | **Your email:** danielk@twitter.com
10 | 
11 | **Your company/organization:** Twitter
12 | 
13 | **Project name:** Sampling Component
14 | 
15 | ## Project Description
16 | 
17 | This project will be a fully custom component that inputs an artifact in `tfRecord` format of `tf.Example`s and randomly undersamples or randomly oversamples it, reducing the data to the lowest- or highest-frequency class. It will primarily use an underlying Apache Beam pipeline that will be wrapped inside the TensorFlow component.
18 | 
19 | ## Project Category
20 | 
21 | Component
22 | 
23 | ## Project Use-Case(s)
24 | 
25 | As this project represents a very general operation used widely in machine learning data processing, we anticipate that it will have wide-ranging use cases, the most evident being in cases where dependent variable classes have wildly different relative frequencies and under/oversampling is needed to help effectively train a classifier. The potential impact will likely be large due to this, and our organization will likely utilize this project in the future.
26 | 
27 | ## Project Solutions
28 | 
29 | We considered multiple possible solutions and implementations for this project before deciding on an Apache Beam-based pipeline, including standard Python code and the utilization of a BigQuery query in order to perform the random /oversampling task. Using a pure Python-based algorithm with `multiprocessing` will likely be inefficient for the purposes of a parallelizable computation such as this one, and utilizing solutions such as Dask would introduce unnecessary dependencies into our project. 
30 | 
31 | BigQuery is also a very good option, and a great fallback in case Apache Beam turns out to be infeasible for this project, but Apache Beam has better python integration through custom `DoFn`s that may help us with our implementation of other algorithms later on. In this case, we would load the data in and out of a BigQuery table and perform our operations within this table. The component would then either utilize a schema generated from `SchemaGen` or inflect one on its own, potententially adding an unneeded dependency into the component or performing unnecessary inflection.
32 | 
33 | ## Project Implementation
34 | 
35 | At a high level, the plan is to use Apache Beam to ingest a `tfRecord` of `tf.Examples`, shuffle them, convert them into a key-value `PCollection` with keys as class values and values as data points, and then perform the actual under/oversampling. Null values (and values that have key classes that are specified by the user) will not be part of the over/undersampling step; they will be separated and added back into the sampled dataset. The algorithm will be written as an Apache Beam pipeline, which will be wrapped into a TensorFlow custom component (with custom executor and spec) to use with TFX pipelines. The component would be written as inputting a `TFRecord` artifact of `tf.Examples` and exporting a similar `TFRecord` artifact, making its placement in a pipeline nearly ubiquitous. 
36 | 
37 | Later additions to the project could include the integration through Apache Beam of one or more other, more complex undersampling or ovesampling algorithms. Our likely focus would be SMOTE for oversampling and either ENN or Tomek Links for undersampling. These would likely be implemented as custom Python functions within the Apache Beam pipeline, although the focus for now is currently the initial random sampling component.
38 | 
39 | ## Project Dependencies
40 | 
41 | tensorflow, TFX, Apache Beam
42 | 
43 | ## Project Team
44 | 
45 | List the members of the project team. Include their names, Github user IDs, and email addresses. Identify project leaders.
46 | 
47 | * Daniel Kim, kindalime, danielk@twitter.com


--------------------------------------------------------------------------------
/proposals/20210723-feature_selection_custom_component.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Nirzari Gupta
 5 | 
 6 | **Your email:** nirzu97@gmail.com
 7 | 
 8 | **Your company/organization:** Outreachy
 9 | 
10 | **Project name:** [Feature selection custom component](https://github.com/tensorflow/tfx-addons/issues/7)
11 | 
12 | ## Project Description
13 | This project provides a facility to perform various feature selection algorithms on datasets in TFX pipelines. Additionally, feature scores for selected features will also be generated as a custom artifact.
14 | 
15 | ## Project Category
16 | Component
17 | 
18 | ## Project Use-Case(s)
19 | This project will allow the user to select different algorithms for performing feature selection on datasets artifacts in TFX pipelines.
20 | 
21 | ## Project Implementation
22 | Feature Selection Custom Component will be implemented as Python function-based component.
23 | Implementation of the Feature Selection Custom Component can be done using the following approach:
24 | - Get dataset artifact generated by ExampleGen
25 | - Convert it into the format compatible with Scikit-Learn functions
26 | - Perform univariate feature selection using parameters given by users
27 | - Remove not selected features from the dataset
28 | - Provide feature scores of the selected features as a custom artifact
29 | 
30 | ## Project Dependencies
31 | The implementation will use the [Scikit-learn feature selection functions](https://scikit-learn.org/stable/modules/feature_selection.html)
32 | 
33 | ## Project Team
34 | **Project Leader** : Nirzari Gupta, nirzu97, nirzu97@gmail.com
35 | 1. Fatimah Adwan, FatimahAdwan, akilahafaf72@gmail.com
36 | 2. Kshitijaa Jaglan, deutranium, jaglan.kshitijaa2@gmail.com
37 | 3. Pratishtha Abrol, pratishtha-abrol, pratishthaabrol@gmail.com
38 | 


--------------------------------------------------------------------------------
/proposals/20210817-firebase_ml_publisher_component.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Chansung Park
 5 | 
 6 | **Your email:** deep.diver.csp@gmail.com
 7 | 
 8 | **Your company/organization:** Individual(ML GDE)
 9 | 
10 | **Project name:** [Firebase ML Publisher](https://github.com/tensorflow/tfx-addons/issues/59)
11 | 
12 | ## Project Description
13 | This project defines a custom TFX component to publish/update ML models to [Firebase ML](https://firebase.google.com/products/ml). This is another type of pusher component, and the input model is assumed to be a TFLite format.
14 | 
15 | ## Project Category
16 | Component
17 | 
18 | ## Project Use-Case(s)
19 | This project helps users to publish trained models directly to Firebase ML.
20 | 
21 | With Firebase ML, we can guarantee that mobile devices can be equipped with the latest ML model without explicitly embedding binary in the project compiling stage. We can even A/B test different versions of a model with Google Analytics when the model is published on Firebase ML.
22 | 
23 | ## Project Implementation
24 | Firebase ML Publisher component will be implemented as Python function-based component. You can find the [actual source code](https://github.com/sayakpaul/Dual-Deployments-on-Vertex-AI/blob/main/custom_components/firebase_publisher.py) in my personal project. Please note this is a personal implementation, and it will be enhanced as a official TFX Addon component.
25 | 
26 | The implementation details
27 | - Define a custom Python function-based TFX component. It takes the following parameters from a previous component.
28 |   - It should follow the standard Pusher's interface since this is another custom pusher.
29 |   - Additionally, it takes meta information to manage published model for Firebase ML such as `display name` and `tags`.
30 | - Download saved TFLite model file by referencing the output from a previous component
31 |   - Firebase SDK doesn't allow to publish models from GCS directly.
32 | - Initialize Firebase Admin with the credential and Firebase temporary-use GCS bucket.
33 |   - Firebase credentials can be setup via [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) for GKE or [Mounting Secret API in TFX runner](https://github.com/tensorflow/tfx/blob/d989bbd7fc366c73ad833428ce6b5cf57a587432/tfx/orchestration/kubeflow/kubeflow_dag_runner.py#L78).
34 | - Search if any models with the same `display name` has already been published.
35 |   - if yes, update the existing Firebase ML mode, then publish it
36 |   - if no, create a new Firebase ML model, then publish it
37 | - Return `tfx.dsl.components.OutputDict` to indicate if the job went successful, and if the job was about creating a new Firebase ML model or updating the exisitng Firebase ML model.
38 | 
39 | ## Project Dependencies
40 | The implementation will use the following libraries.
41 | - [Firebase Admin Python SDK](https://github.com/firebase/firebase-admin-python) >= 5.0.2
42 | - [Python Client for Google Cloud Storage](https://github.com/googleapis/python-storage) >= 1.42.0
43 | 
44 | ## Project Team
45 | **Project Leader** : Chansung Park, deep-diver, deep.diver.csp@gmail.com
46 | 1. Sayak Paul, sayakpaul, spsayakpaul@gmail.com
47 | 


--------------------------------------------------------------------------------
/proposals/20220117-exit-handler-slack.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal for Slack Exit Handler for TFX Pipelines
 3 | 
 4 | **Your name:** Hannes Max Hapke
 5 | 
 6 | **Your email:** hannes@digits.com
 7 | 
 8 | **Your company/organization:** Digits Financial, Inc.
 9 | 
10 | **Project name:** Slack Exit Handler for TFX Pipelines
11 | 
12 | ## Project Description
13 | 
14 | The component provides an exit handler for TFX pipelines which notifies the user about the final state of the pipeline (failed or succeeded) via a Slack message. If the pipeline failed, the component will provide the error message.
15 | 
16 | ## Project Category
17 | 
18 | Component
19 | 
20 | ## Project Use-Case(s)
21 | 
22 | The exit handler notifies Digits' ML team about the final state of a pipeline. Instead of constantly pulling the pipeline status via the Vertex cli, the exit handler notifies us.
23 | 
24 | The implementation can be extended to cover us communication services (e.g. SMS via Twilio) too.
25 | 
26 | Furthermore, the implementation can be seen as an example implementation for an exit handler. Other users could use the same setup to trigger downstream pipelines or trigger other post-run actions.
27 | 
28 | ## Project Implementation
29 | 
30 | The existing implementation is Python-based and it uses the `tfx.orchestration.experimental.exit_handler` decorator.
31 | 
32 | The component excepts 4 parameters:
33 | * final_status
34 | * slack_token
35 | * slack_channel_id
36 | * on_failure_only
37 | 
38 | `final_status` is the JSON string of the pipeline status, provided by TFX. The Slack parameters contain the credentials to submit the message.  And `on_failure_only` is a configuration for frequently run pipeline to only alert on failures. We have a number of pipelines were this options was useful.
39 | 
40 | The component parses the status, and composes a message based on the content.
41 | 
42 | ```
43 |     job_id = status["pipelineJobResourceName"].split("/")[-1]
44 |     if status["state"] == "SUCCEEDED":
45 |         message = f":tada: Pipeline job *{job_id}* completed successfully.\n"
46 |     else:
47 |         message = f":scream: Pipeline job *{job_id}* failed."
48 |         message += f"\n>{status['error']['message']}"
49 | ```
50 | 
51 | The a Slack web client object is created and the message is submitted via the object.
52 | 
53 | Overall, the implementation is minimal, but it serves as a great exit handler example.
54 | 
55 | ### Current Digits Implementation
56 | 
57 | #### Pipeline Success Message
58 | ![Screen_Shot_2022-01-05_at_3_23_43_PM_2](https://user-images.githubusercontent.com/1234819/148304418-9232fe68-57a3-4976-bd01-8d3e14bbf00b.png)
59 | 
60 | #### Pipeline Failure Message
61 | ![_Screen_Shot_2022-01-05_at_2_45_47_PM](https://user-images.githubusercontent.com/1234819/148301546-b8ae19e3-ff71-4ec6-9969-06e71672b2e2.png)
62 | 
63 | #### Visualization in Google Cloud Vertex Pipelines
64 | ![Screen_Shot_2022-01-05_at_3_28_06_PM_2](https://user-images.githubusercontent.com/1234819/148304482-22347d1f-fb9c-4744-92ef-1d020c79f2fc.png)
65 | 
66 | 
67 | ## Project Dependencies
68 | 
69 | The component requires:
70 | * TFX version >= 1.4.0
71 | * Slack Python client
72 | 
73 | The component will also require Google Cloud's Vertex pipelines as its orchestrator.
74 | 
75 | ## Project Team
76 | 
77 | * Hannes Hapke (@hanneshapke), hannes -at- digits.com
78 | 
79 | # Note
80 | 
81 | Please be aware of the processes and requirements which are outlined here:
82 | 
83 | * [SIG-TFX-Addons](https://github.com/tensorflow/tfx-addons)
84 | * [Contributing Guidelines](https://github.com/tensorflow/tfx-addons/blob/main/CONTRIBUTING.md)
85 | * [TensorFlow Code of Conduct](https://github.com/tensorflow/tfx-addons/blob/main/CODE_OF_CONDUCT.md)
86 | 


--------------------------------------------------------------------------------
/proposals/20220118-upload_predictions_to_bigquery.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal for Upload Predictions to BigQuery
 3 | 
 4 | **Your name:** Hannes Max Hapke
 5 | 
 6 | **Your email:** hannes@digits.com
 7 | 
 8 | **Your company/organization:** Digits Financial, Inc.
 9 | 
10 | **Project name:** Upload Predictions to BigQuery component
11 | 
12 | ## Project Description
13 | 
14 | The project addresses the project idea #78. The TFX `BulkInferrer` allows pipeline to apply an ML model (loaded or trained in the pipeline) and generates predictions for the provided inference data.
15 | 
16 | This project will provide a component which receives the predictions from the `BulkInferrer` and writes the results to BigQuery.
17 | 
18 | ## Project Category
19 | 
20 | Component
21 | 
22 | ## Project Use-Case(s)
23 | 
24 | Such a component is useful for generating predictions within the pipeline or for two-step pipelines producing semi-supervised ML models.
25 | 
26 | ## Project Implementation
27 | 
28 | The existing implementation was written as a "traditional" TFX component with its `ComponentSpec`, `Executor`, etc. to run efficiently on Apache Beam.
29 | 
30 | The implementation receives 3 artifacts:
31 | * transform_graph
32 | * inference_results
33 | * schema
34 | 
35 | The `transform_graph` is used to convert classification probabilities to a label. The TFX `schema` is used to generate the BigQuery schema for the table inserts. And the `inference_results` contain the information provided from the upstream `BulkInferrer` component.
36 | 
37 | In addition, the component accepts a number of parameters to customize the BigQuery inserts:
38 | * bq_table_name - Table name
39 | * filter_threshold - threshold to filter results with low confidence
40 | * table_suffix - suffix for daily inferences
41 | * table_partitioning - BQ partitioning setting for newly created tables
42 | * expiration_time_delta - BQ expiration time after which the table will expire
43 | 
44 | The component processes the inference results, converts the class likelihoods into class labels, and then generates a tables schema from the TFX schema information, before it writes the information to Big Query.
45 | 
46 | The writing to Big Query is done via Apache Beam.
47 | 
48 | ```
49 | with self._make_beam_pipeline() as pipeline:
50 |     _ = (pipeline
51 |             | 'Read Prediction Log' >> beam.io.ReadFromTFRecord(
52 |                 prediction_log_path,
53 |             coder=prediction_log_decoder)
54 |             | 'Filter and Convert to Dict' >> beam.ParDo(
55 |                 FilterPredictionToDictFn(
56 |                     labels=labels,
57 |                     features=features,
58 |                     ts=ts,
59 |                     filter_threshold=exec_properties['filter_threshold'],
60 |                 )
61 |             )
62 |             | 'Write Dict to BQ' >> beam.io.gcp.bigquery.WriteToBigQuery(
63 |                 table=bq_table_name,
64 |                 schema=bq_schema,
65 |                 additional_bq_parameters=_ADDITIONAL_BQ_PARAMETERS,
66 |                 create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
67 |                 write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE)
68 |             )
69 | ```
70 | 
71 | After the completion of the datat insert, the component returns the `generated_bq_table_name` as a string artifact for downstream components.
72 | 
73 | ## Project Dependencies
74 | 
75 | The component requires:
76 | * TFX version >= 1.0.0
77 | * Apache Beam
78 | * TensorFlow Transform
79 | 
80 | The component implicitly requires Google Cloud as a Dependency due to the writing operation to BigQuery.
81 | 
82 | ## Project Team
83 | 
84 | * Hannes Hapke (@hanneshapke), hannes -at- digits.com
85 | * Ukjae Jeong (@jeongukjae)
86 | 
87 | # Note
88 | 
89 | Please be aware of the processes and requirements which are outlined here:
90 | 
91 | * [SIG-TFX-Addons](https://github.com/tensorflow/tfx-addons)
92 | * [Contributing Guidelines](https://github.com/tensorflow/tfx-addons/blob/main/CONTRIBUTING.md)
93 | * [TensorFlow Code of Conduct](https://github.com/tensorflow/tfx-addons/blob/main/CODE_OF_CONDUCT.md)
94 | 


--------------------------------------------------------------------------------
/proposals/20220513-pandas_transform.md:
--------------------------------------------------------------------------------
 1 | **Your name:** Robert Crowe
 2 | 
 3 | **Your email:** robertcrowe--at--google--dot--com
 4 | 
 5 | **Your company/organization:** Google
 6 | 
 7 | **Project name:** PandasTransform
 8 | 
 9 | ## Project Description
10 | This project will develop a new TFX component which can be used instead of the standard Transform component, and allows developers to work with Pandas dataframes for their feature engineering.  Processing will be distributed using Beam for scalability.  Operations which require a full pass over the dataset will not be supported in the first release.
11 | 
12 | ## Project Category
13 | Component
14 | 
15 | ## Project Use-Case(s)
16 | The primary use cases are:
17 | * Developers who are not modeling in TensorFlow
18 | * Developers who are prototyping and are more comfortable working with dataframes, at least initially, and may not deploy their model for inference
19 | * Developers whose feature engineering can work with the basic statistics of the dataset (min, max, etc) and do not need to make full passes over the data
20 | 
21 | ## Project Implementation
22 | This will be implemented as a Python-function component, using Beam for processing.  Like the Transform component the user will supply a module file with their user code in a `preprocessing_fn`.  Their code will be supplied with their dataset as a Pandas dataframe, and they will return their results as a Pandas dataframe.  Their code will also be supplied with the basic statistics for their dataset, generated by StatisticsGen, and formatted as a Python dictionary.  Their code will also be supplied with the schema of their dataset, generated by SchemaGen, and formatted as a Python dictionary.
23 | 
24 | **Caveats:** It's important to note that each invocation of their `preprocessing_fn` will only be supplied with part of their dataset, to enable distributed processing.  That means that full passes over their dataset by their user code will not be possible, so operations which require a full pass will not be supported in the first release.  A future release may or may not enable full pass operations, TBD.
25 | 
26 | **Additional Notes:** It's also important to note that unlike the standard Transform component, this PandasTransform component does not output the
27 | modified schema and statistics for the altered dataset.  To generate a schema and statistics which reflect any changes that you've made to your
28 | dataset, you should follow the PandasTransform component with StatisticsGen and SchemaGen components in your pipeline.
29 | 
30 | ## Project Dependencies
31 | Apache Beam
32 | PyArrow
33 | Pandas
34 | TensorFlow
35 | TensorFlow Data Validation
36 | TFX
37 | 
38 | ## Project Team
39 | Robert Crowe (rcrowe-google) robertcrowe--at--google--dot--com
40 | 
41 | # Note
42 | Please be aware of the processes and requirements which are outlined here:
43 | 
44 | * [SIG-TFX-Addons](https://github.com/tensorflow/tfx-addons)
45 | * [Contributing Guidelines](https://github.com/tensorflow/tfx-addons/blob/main/CONTRIBUTING.md)
46 | * [TensorFlow Code of Conduct](https://github.com/tensorflow/tfx-addons/blob/main/CODE_OF_CONDUCT.md)
47 | 


--------------------------------------------------------------------------------
/proposals/20220802-project_pytorch_example.md:
--------------------------------------------------------------------------------
 1 | **Your name:** Hannes Hapke
 2 | 
 3 | **Your email:** hannes--at--digits--dot--com
 4 | 
 5 | **Your company/organization:** Digits Financial Inc
 6 | 
 7 | **Project name:** TFX PyTorch Example
 8 | 
 9 | ## Project Description
10 | Adding a TFX pipeline example for PyTorch models to the TFX Addons repository.
11 | 
12 | ## Project Category
13 | Example
14 | 
15 | ## Project Use-Case(s)
16 | While there are a few non-TF model-based examples for TFX (e.g. JAX or Scikit), there isn't a maintained example for PyTorch models.
17 | 
18 | ## Project Implementation
19 | The pipeline example includes the following components:
20 | - Load a known dataset, e.g. MNIST,  via the CSVExampleGen component
21 | - Run the standard statistics and schema steps via StatisticsGen and SchemaGen
22 | - Performs a pseudo transformation (passthrough of the values) with the new PandasTransform component from tfx-addons
23 | - Add a custom run_fn function for PyTorch for the Trainer component
24 | - Add a TFMA example how to analysis PyTorch models to obtain a model blessing
25 | - Push the models to a local path
26 | 
27 | ## Project Dependencies
28 | The example will on TFX (1.9.1), TFX addons (0.2), Apache Beam, and PyTorch (1.0.2)
29 | 
30 | ## Project Team
31 | Hannes Hapke (gh: hanneshapke, email: hannes--at--digits--dot--com)
32 | More contributors more than welcome
33 | 
34 | # Note
35 | Please be aware of the processes and requirements which are outlined here:
36 | 
37 | * [SIG-TFX-Addons](https://github.com/tensorflow/tfx-addons)
38 | * [Contributing Guidelines](https://github.com/tensorflow/tfx-addons/blob/main/CONTRIBUTING.md)
39 | * [TensorFlow Code of Conduct](https://github.com/tensorflow/tfx-addons/blob/main/CODE_OF_CONDUCT.md)
40 | 


--------------------------------------------------------------------------------
/proposals/20230328-airflow_orchestration.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Woosung Song
 5 | 
 6 | **Your email:** wssong@google.com
 7 | 
 8 | **Your company/organization:** Google
 9 | 
10 | **Project name:** Apache Airflow for Pipeline Orchestration
11 | 
12 | ## Project Description
13 | Apache Airflow for pipeline orchestration is going to be migrated from the
14 | official TFX to Addons.
15 | 
16 | ## Project Category
17 | Other (Orchestration)
18 | 
19 | ## Project Use-Case(s)
20 | In order to simplify core TFX for users who are not using Airflow, we would like
21 | to separate out support for the Airflow orchestrator into a pluggable module and
22 | make it available through TFX-Addons. This will help simplify the core TFX
23 | install, dependencies, and tests, and decrease the size of the installed
24 | payload.
25 | 
26 | The functionality of the orchestrator will be retained, but users will need to
27 | update the import paths. To make the transition smoother, it will coexist on
28 | both the official TFX and Addons for a while, and the official one will be
29 | deprecated from the 1.14.0 release.
30 | 
31 | ## Project Implementation
32 | The basic implementation and API signatures will follow the original methods,
33 | but the internal dependencies and testing will be reimplemented.
34 | 
35 | The import path will be moved from `tfx.orchestration.airflow` to
36 | `tfx_addons.airflow_orchestration`.
37 | 
38 | ```python
39 | from tfx_addons.airflow_orchestration import airflow_dag_runner
40 | 
41 | def _create_pipeline():
42 |   ...
43 |   return [example_gen, statistics_gen, trainer, evaluator, pusher]
44 | 
45 | runner = airflow_dag_runner.AirflowDagRunner(_airflow_dag_config)
46 | result = runner.run(_create_pipeline())
47 | ```
48 | 
49 | ## Project Dependencies
50 | It introduces `apache-airflow[mysql]>=1.10.14,<3` as the dependencies.
51 | 
52 | ## Project Team
53 | **Project Leader** : Woosung Song, lego0901, wssong@google.com
54 | 1. Woosung Song, wssong@google.com, @wssong
55 | 


--------------------------------------------------------------------------------
/proposals/README.md:
--------------------------------------------------------------------------------
 1 | # SIG TFX-Addons Project Proposals 
 2 | 
 3 | This directory contains current and past project proposals that either are,
 4 | or have been previously, under consideration for approval.
 5 | 
 6 | Projects start as project ideas, which are submitted as
 7 | [issues marked with the `Project:Idea` tag](https://github.com/tensorflow/tfx-addons/issues?q=is%3Aissue+is%3Aopen+label%3A%22Project%3A+Idea%22).
 8 | They are then discussed by the group, and if a team
 9 | of contributors decides to volunteer to implement a project then a project
10 | proposal is written.
11 | 
12 | To have your project proposal considered for approval, copy and complete the
13 | [project template](yyyymmdd-project_template.md) and create a pull request to
14 | place it in this directory.


--------------------------------------------------------------------------------
/proposals/yyyymmdd-project_template.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | ## Instructions
 4 | Copy this template, naming it with the date in `yyyymmdd` format followed by a short name, and fill in the blanks below.
 5 | When you're ready for review, place it in `/proposals` and create a pull request.
 6 | 
 7 | ---
 8 | 
 9 | **Your name:** ________________
10 | 
11 | **Your email:** ________________
12 | 
13 | **Your company/organization:** ____________________
14 | 
15 | **Project name:** ____________________
16 | 
17 | ## Project Description
18 | Describe the basics of your project (1-2 sentences).
19 | 
20 | ## Project Category
21 | Choose 1: Component | Example | Other
22 | 
23 | ## Project Use-Case(s)
24 | Describe at least one use case for your project. Please mention whether your organization will or will not use this project, or if
25 | they already use it. Please also include the potential impact, and any overlap, dependencies, or synergies with other projects.
26 | 
27 | ## Project Implementation
28 | Describe at a high-level how you plan to implement your project. If you plan to use containers and/or languages other than Python,
29 | please indicate that. Please also include information about packaging and releasing this project. Please remember that the project
30 | team will be responsible for packaging and releases.
31 | 
32 | ## Project Dependencies
33 | Please list any imports and dependencies that you plan to use.  Please
34 | highlight any dependencies which are not open-source.
35 | 
36 | ## Project Team
37 | List the members of the project team.  Include their names, Github user IDs, and email addresses. Identify project leaders.
38 | 
39 | # Note
40 | Please be aware of the processes and requirements which are outlined here:
41 | 
42 | * [SIG-TFX-Addons](https://github.com/tensorflow/tfx-addons)
43 | * [Contributing Guidelines](https://github.com/tensorflow/tfx-addons/blob/main/CONTRIBUTING.md)
44 | * [TensorFlow Code of Conduct](https://github.com/tensorflow/tfx-addons/blob/main/CODE_OF_CONDUCT.md)


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=42",
 4 |     "wheel"
 5 | ]
 6 | build-backend = "setuptools.build_meta"
 7 | 
 8 | [tool.pytest.ini_options]
 9 | addopts = "--verbose"
10 | python_files = "*_test.py"
11 | norecursedirs = ["env", "proposals"]


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Package Setup script for TFX Addons."""
 16 | import itertools
 17 | import os
 18 | 
 19 | from setuptools import find_namespace_packages, setup
 20 | 
 21 | PROJECT_NAME = "tfx-addons"
 22 | 
 23 | 
 24 | def get_pkg_metadata():
 25 |   # Version
 26 |   context = {}
 27 |   base_dir = os.path.dirname(os.path.abspath(__file__))
 28 |   with open(os.path.join(base_dir, "tfx_addons", "version.py")) as fp:
 29 |     exec(fp.read(), context)  # pylint: disable=exec-used
 30 | 
 31 |   return context["_PKG_METADATA"]
 32 | 
 33 | 
 34 | def get_version():
 35 |   # Version
 36 |   context = {}
 37 |   base_dir = os.path.dirname(os.path.abspath(__file__))
 38 |   with open(os.path.join(base_dir, "tfx_addons", "version.py")) as fp:
 39 |     exec(fp.read(), context)  # pylint: disable=exec-used
 40 | 
 41 |   return context["__version__"]
 42 | 
 43 | 
 44 | def get_ci_constraints():
 45 |   # Version
 46 |   context = {}
 47 |   base_dir = os.path.dirname(os.path.abspath(__file__))
 48 |   with open(os.path.join(base_dir, "tfx_addons", "version.py")) as fp:
 49 |     exec(fp.read(), context)  # pylint: disable=exec-used
 50 | 
 51 |   return context["_CI_MIN_CONSTRAINTS"], context["_CI_MAX_CONSTRAINTS"]
 52 | 
 53 | 
 54 | def get_long_description():
 55 |   base_dir = os.path.dirname(os.path.abspath(__file__))
 56 |   with open(os.path.join(base_dir, "README.md")) as fp:
 57 |     return fp.read()
 58 | 
 59 | 
 60 | TESTS_REQUIRE = ["pytest", "pylint", "pre-commit", "isort", "yapf"]
 61 | 
 62 | PKG_REQUIRES = get_pkg_metadata()
 63 | EXTRAS_REQUIRE = PKG_REQUIRES.copy()
 64 | EXTRAS_REQUIRE["all"] = list(
 65 |     set(itertools.chain.from_iterable(list(PKG_REQUIRES.values()))))
 66 | EXTRAS_REQUIRE["test"] = TESTS_REQUIRE
 67 | CI_MIN_CONSTRAINTS, CI_MAX_CONSTRAINTS = get_ci_constraints()
 68 | EXTRAS_REQUIRE["ci_min"] = CI_MIN_CONSTRAINTS
 69 | EXTRAS_REQUIRE["ci_max"] = CI_MAX_CONSTRAINTS
 70 | 
 71 | setup(
 72 |     name=PROJECT_NAME,
 73 |     version=get_version(),
 74 |     description="TFX Addons libraries",
 75 |     author="The Tensorflow Authors",
 76 |     long_description=get_long_description(),
 77 |     long_description_content_type='text/markdown',
 78 |     url="https://github.com/tensorflow/tfx-addons",
 79 |     project_urls={
 80 |         # ToDo(gcasassaez): To add docs once we have some docs integrated.
 81 |         # "Documentation": "",
 82 |         "Bug Tracker": "https://github.com/tensorflow/tfx-addons/issues",
 83 |     },
 84 |     extras_require=EXTRAS_REQUIRE,
 85 |     tests_require=TESTS_REQUIRE,
 86 |     packages=find_namespace_packages(include=[
 87 |         # Add here new library package
 88 |         "tfx_addons",
 89 |     ] + [f"tfx_addons.{m}.*"
 90 |          for m in PKG_REQUIRES] + [f"tfx_addons.{m}" for m in PKG_REQUIRES]),
 91 |     classifiers=[
 92 |         "Intended Audience :: Developers",
 93 |         "Intended Audience :: Education",
 94 |         "Intended Audience :: Science/Research",
 95 |         "License :: OSI Approved :: Apache Software License",
 96 |         "Programming Language :: Python :: 3",
 97 |         "Programming Language :: Python :: 3.7",
 98 |         "Programming Language :: Python :: 3.8",
 99 |         "Programming Language :: Python :: 3.9",
100 |         "Topic :: Scientific/Engineering",
101 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
102 |         "Topic :: Software Development",
103 |         "Topic :: Software Development :: Libraries",
104 |         "Topic :: Software Development :: Libraries :: Python Modules",
105 |     ],
106 |     python_requires=">=3.7",
107 |     include_package_data=True,
108 | )
109 | 


--------------------------------------------------------------------------------
/tfx_addons/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Init module for TFX."""
16 | 
17 | import importlib as _importlib
18 | 
19 | from .version import _PKG_METADATA, __version__
20 | 
21 | _ACTIVE_MODULES = [
22 |     "__version__",
23 | ] + list(_PKG_METADATA.keys())
24 | 
25 | 
26 | def __getattr__(name):  # pylint: disable=C0103
27 |   # PEP-562: Lazy loaded attributes on python modules
28 |   # NB(gcasassaez): We lazy load to avoid issues with dependencies not installed
29 |   # for some subpackes
30 |   if name in _ACTIVE_MODULES:
31 |     return _importlib.import_module("." + name, __name__)
32 |   raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
33 | 


--------------------------------------------------------------------------------
/tfx_addons/apache_airflow/README.md:
--------------------------------------------------------------------------------
1 | # Apache Airflow Orchestrator
2 | 
3 | (Please fill in a description of the project, usage instructions, etc.)
4 | 


--------------------------------------------------------------------------------
/tfx_addons/copy_example_gen/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/copy_example_gen/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """
16 | Tests for tfx_addons.copy_example_gen.component.
17 | """
18 | from unittest import mock
19 | 
20 | import tensorflow as tf
21 | 
22 | from tfx_addons.copy_example_gen import component
23 | 
24 | 
25 | class TestCopyExampleGen(tf.test.TestCase):
26 |   """Test module for CopyExampleGen."""
27 |   def setUp(self):
28 |     self.input_json_str = """
29 |     {
30 |       "label1": "fakeuri",
31 |       "label2": "fakeuri2",
32 |     }
33 |     """
34 | 
35 |   def test_empty_input(self) -> None:
36 |     empty_input_json_str = ""
37 |     expected_error = (
38 |         "Input string is not provided. Expected format is Split label (key) "
39 |         "and Split URI (value).")
40 | 
41 |     with self.assertRaises(ValueError, msg=expected_error):
42 |       # pylint: disable=protected-access
43 |       component._create_input_dictionary(input_json_str=empty_input_json_str)
44 | 
45 |   def test_non_dictionary_input(self) -> None:
46 |     non_dictionary_input = "'a', 'b', 'c'"
47 |     expected_error = (
48 |         f"Input string {non_dictionary_input} is not provided as a dictionary. "
49 |         "Expected format is Split label (key) and Split URI (value).")
50 | 
51 |     with self.assertRaises(ValueError, msg=expected_error):
52 |       # pylint: disable=protected-access
53 |       component._create_input_dictionary(input_json_str=non_dictionary_input)
54 | 
55 |   def test_empty_dictionary(self) -> None:
56 |     empty_input_json_str = "{}"
57 |     expected_error = (
58 |         "Input dictionary is empty. Expected format is Split label (key) "
59 |         "and Split URI (value).")
60 | 
61 |     with self.assertRaises(ValueError, msg=expected_error):
62 |       # pylint: disable=protected-access
63 |       component._create_input_dictionary(input_json_str=empty_input_json_str)
64 | 
65 |   def test_valid_input(self) -> None:
66 |     with mock.patch('tfx_addons.copy_example_gen.component.fileio'):
67 |       # pylint: disable=protected-access
68 |       component.CopyExampleGen(input_json_str=self.input_json_str)
69 | 
70 |   def test_empty_gcs_directory(self) -> None:
71 |     with mock.patch(
72 |         'tfx_addons.copy_example_gen.component.fileio') as mock_fileio:
73 |       # Returns an empty list indicating no matching files in that location.
74 |       mock_fileio.glob.return_value = []
75 |       with self.assertLogs() as warning_msg:
76 |         # pylint: disable=protected-access
77 |         component._copy_examples(split_tfrecords_uri="mock_uri",
78 |                                  split_value_uri="mock_uri_2")
79 |         expected_msg = (
80 |             "WARNING:root:Directory mock_uri does not contain files with .gz "
81 |             "suffix.")
82 |         self.assertEqual(warning_msg.output, [expected_msg])
83 | 


--------------------------------------------------------------------------------
/tfx_addons/example_filter/README.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Ryan Clough
 5 | 
 6 | **Your email:** rclough@spotify.com
 7 | 
 8 | **Your company/organization:** Spotify
 9 | 
10 | **Project name:** Example Filter
11 | 
12 | ## Project Description
13 | Beam based component that can filter Examples based on a user-defined predicate function.
14 | 
15 | ## Project Category
16 | Choose 1: Component
17 | 
18 | ## Project Use-Case(s)
19 | Data can be imported into TFX in a number of ways, and indeed, sometimes the dataset you wish to load is not under your direct 
20 | control. In cases like these, it is useful to have a component that can filter your input data with simple rules. Ex: filter 
21 | all records where `feature_a >= 1`. 
22 | 
23 | Our organization currently has a component for this purpose that is in active use. It is not as robust as it could be.
24 | 
25 | It is also worth conidering that we may wish to try and promote this functionality to be included in the TFX core base ExampleGen,
26 | so that the filtering could be done within any ExampleGen based component.
27 | 
28 | ## Project Implementation
29 | Spotify can provide the current implementation, which is based off of an old version of Tensorflow Transform. At a high level, use
30 | of the component looks like:
31 | 
32 | ```python
33 | def predicate_fn(example)
34 |     # Throw out Examples that used a credit card
35 |     if b'Credit Card' in example['payment_type']:
36 |         return False
37 |     return True
38 | ...
39 | 
40 | filtered_examples = ExampleFilter(
41 |     examples=examples.output,
42 |     schema=schema.output,
43 |     module_file=filter_module,
44 | )
45 | ```
46 | 
47 | ## Packaging
48 | 
49 | Given that it's a Beam component, I think it will have to be a fully custom component. 
50 | 
51 | In terms of packaging and providing, we can provide the code, and a sample docker file and example pipeline for the component.
52 | 
53 | ## Future Considerations
54 | 
55 | For the purposes of this proposal, the `ExampleFilter` component will be submitted as-is, as to not let "perfect" become the 
56 | enemy of "good enough". There are a number of potential improvements that could be made to the component, but working 
57 | through them should be a separate process from this initial proposal to get a working MVP.
58 | 
59 | The current implementation is a bit dated and not so robust. It depends on a deprecated TFT proto coder, and only works on
60 | TF Records, as it does not make use of TFXIO. As part of bringing this to TFX-addons, I think it is worth iterating on the
61 | current design. Some initial ideas for change might be:
62 | 
63 | * Implementing it more flexibly in TFXIO
64 | * Determine if there's a way to implement it without requiring a schema
65 | * Making the predicate_fn operate on true data types rather than bytes (see example above)
66 | * Adding an input that allows the user to specify splits (currently applies to all splits)
67 | 
68 | ## Project Dependencies
69 | Current implementation uses a [proto decoder](https://github.com/tensorflow/transform/blob/v0.24.1/tensorflow_transform/coders/example_proto_coder.py#L329-L339)
70 | deprecated from TFX 0.25 onwards. Otherwise the project uses standard TFX dependencies.
71 | 
72 | ## Project Team
73 | * Ryan Clough, rclough@spotify.com, @rclough
74 | * TBD


--------------------------------------------------------------------------------
/tfx_addons/example_filter/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version (Still in Development)
 2 | 
 3 | ### Last Update: 15 September 2021
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   None at this time
 8 | 
 9 | ## Breaking Changes
10 | 
11 | *   None at this time
12 | 
13 | ## Deprecations
14 | 
15 | *   None at this time
16 | 
17 | ## Bug Fixes and Other Changes
18 | 
19 | *   None at this time
20 | 
21 | ## Documentation Updates
22 | 
23 | *   None at this time


--------------------------------------------------------------------------------
/tfx_addons/example_filter/component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """
16 | the component for filter addon
17 | """
18 | 
19 | import importlib
20 | import os
21 | 
22 | import tensorflow as tf
23 | from tfx.dsl.component.experimental.annotations import OutputDict
24 | from tfx.dsl.io.fileio import listdir
25 | from tfx.types import standard_artifacts
26 | from tfx.v1.dsl.components import InputArtifact, Parameter
27 | from tfx_bsl.coders import example_coder
28 | 
29 | 
30 | def _get_data_from_tfrecords(train_uri: str):
31 |   '''
32 |         Reads and returns data from TFRecords at URI as a list
33 |          of dictionaries with values as numpy arrays
34 |         Example:
35 |           _get_data_from_tfrecords('path_to_TFRecords')
36 |       '''
37 |   train_uri = [
38 |       os.path.join(train_uri, file_path) for file_path in listdir(train_uri)
39 |   ]
40 |   raw_dataset = tf.data.TFRecordDataset(train_uri, compression_type='GZIP')
41 | 
42 |   np_dataset = []
43 |   for tfrecord in raw_dataset:
44 |     serialized_example = tfrecord.numpy()
45 |     example = example_coder.ExampleToNumpyDict(serialized_example)
46 |     np_dataset.append(example)
47 | 
48 |   return np_dataset
49 | 
50 | 
51 | def filter_component(input_data: InputArtifact[standard_artifacts.Examples],
52 |                      filter_function_str: Parameter[str],
53 |                      output_file: Parameter[str]) -> OutputDict(list_len=int):
54 |   """Filters the data from input data by using the filter function.
55 | 
56 |         Args:
57 |           input_data: Input list of data to be filtered.
58 |           output_file: the name of the file to be saved to.
59 |           filter_function_str: Module name of the function that will be used to
60 |           filter the data.
61 |             Example for the function
62 |                 my_example/my_filter.py:
63 | 
64 |                 # filter module must have filter_function implemented
65 |                 def filter_function(input_list: Array):
66 |                     output_list = []
67 |                     for element in input_list:
68 |                         if element.something:
69 |                             output_list.append(element)
70 |                     return output_list
71 | 
72 |                 pipeline.py:
73 |                 filter_component(input_data ,'my_example.my_filter',output_data)
74 | 
75 |         Returns:
76 |           len of the list after the filter
77 |                {
78 |                  'list_len': len(output_list)
79 |                }
80 | 
81 |         """
82 |   records = _get_data_from_tfrecords(input_data.uri + "/Split-train")
83 |   filter_function = importlib.import_module(
84 |       filter_function_str).filter_function
85 |   filtered_data = filter_function(records)
86 |   result_len = len(filtered_data)
87 |   new_data = []
88 |   for key in list(filtered_data[0].keys()):
89 |     local_list = []
90 |     for i in range(result_len):
91 |       local_list.append(str(filtered_data[i][key][0]))
92 |     new_data.append(str(local_list))
93 |   writer = tf.io.TFRecordWriter(output_file)
94 |   writer.write(tf.data.Dataset.from_tensor_slices(new_data).map(lambda x: x))
95 | 
96 |   return {'list_len': result_len}
97 | 


--------------------------------------------------------------------------------
/tfx_addons/example_filter/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Component test for the filter component."""
16 | 
17 | import os
18 | 
19 | import tensorflow as tf
20 | from absl.testing import absltest
21 | from tfx.types import artifact_utils, standard_artifacts
22 | 
23 | from tfx_addons.example_filter.component import filter_component
24 | 
25 | 
26 | class ComponentTest(absltest.TestCase):
27 |   def testConstructWithOptions(self):
28 |     source_data_dir = os.path.join(os.path.dirname(__file__), 'data')
29 | 
30 |     examples = standard_artifacts.Examples()
31 |     examples.uri = os.path.join(source_data_dir, "example_gen")
32 |     examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
33 | 
34 |     params = {
35 |         "input_data": examples,
36 |         "filter_function_str": 'filter_function',
37 |         "output_file": 'output',
38 |     }
39 |     filter_component(**params)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |   tf.test.main()
44 | 


--------------------------------------------------------------------------------
/tfx_addons/example_filter/data/test_data.csv:
--------------------------------------------------------------------------------
 1 | label,col1
 2 | ,2
 3 | ,2
 4 | ,2
 5 | ,2
 6 | ,2
 7 | ,2
 8 | ,2
 9 | ,2
10 | ,2
11 | ,2
12 | 1,1
13 | 1,1
14 | 1,1
15 | 1,1
16 | 1,1
17 | 1,1
18 | 1,1
19 | 1,1
20 | 1,1
21 | 1,1
22 | 1,1
23 | 1,1
24 | 1,1
25 | 1,1
26 | 1,1
27 | 1,1
28 | 1,1
29 | 1,1
30 | 1,1
31 | 1,1
32 | 1,1
33 | 1,1
34 | 0,0
35 | 0,0
36 | 0,0
37 | 0,0
38 | 0,0
39 | 0,0
40 | 0,0
41 | 0,0
42 | 0,0
43 | 0,0
44 | 0,0
45 | 0,0
46 | 0,0
47 | 0,0
48 | 0,0
49 | 0,0
50 | 0,0
51 | 0,0


--------------------------------------------------------------------------------
/tfx_addons/example_filter/filter_function.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Example function to demonstrate the filter functionality of the module."""
16 | 
17 | 
18 | def filter_function(x_list):
19 |   """Filters the data from input data by using the filter function.
20 | 
21 |     Args:
22 |       x_list: Input list of data to be filtered.
23 | 
24 | 
25 |     Returns:
26 |       filtered list
27 | 
28 |     """
29 |   new_list = []
30 |   for element in x_list:
31 |     if element['label'] == [0]:
32 |       new_list.append(element)
33 |   return new_list
34 | 


--------------------------------------------------------------------------------
/tfx_addons/feast_examplegen/README.md:
--------------------------------------------------------------------------------
 1 | # FeastExampleGen
 2 | 
 3 | ExampleGen for Feast feature store.
 4 | 
 5 | This component generates a Dataset out of a Feast entity_query and either a list of features or a feature service key.
 6 | 
 7 | ## Installation
 8 | 
 9 | ```sh
10 | pip install tfx-addons[feast_examplegen]
11 | ```
12 | 
13 | ## Example usage
14 | 
15 | ```python
16 | example_gen = FeastExampleGen(
17 |   repo_config=RepoConfig(register="gs://..."),
18 |   entity_query="SELECT user, timestamp from some_user_dataset",
19 |   features=["f1", "f2"],
20 | )
21 | ```
22 | Component can be configured the same way as any [QueryBasedExampleGen](https://www.tensorflow.org/tfx/guide/examplegen#query-based_examplegen_customization_experimental).
23 | 
24 | Component `outputs` contains:
25 |    - `examples`: Channel of type `standard_artifacts.Examples` for output train
26 |                  and eval examples.
27 | 
28 | ## Extra information
29 | 
30 | - [Proposal](https://github.com/tensorflow/tfx-addons/blob/main/proposals/20210525-feast_example_gen.md)
31 | - [Example usage](https://github.com/tensorflow/tfx-addons/tree/main/examples/fraud_feast)
32 | 


--------------------------------------------------------------------------------
/tfx_addons/feast_examplegen/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Init module for feast examplegen"""
16 | 
17 | from tfx_addons.feast_examplegen.component import FeastExampleGen
18 | 


--------------------------------------------------------------------------------
/tfx_addons/feast_examplegen/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """
16 | Tests for tfx_addons.feast_examplegen.component.
17 | """
18 | 
19 | import pytest
20 | 
21 | try:
22 |   import feast
23 | except ImportError:
24 |   pytest.skip("feast not available, skipping", allow_module_level=True)
25 | 
26 | from tfx.v1.proto import Input
27 | 
28 | from tfx_addons.feast_examplegen.component import FeastExampleGen
29 | 
30 | 
31 | def test_init_valid():
32 |   entity_query = 'SELECT user FROM fake_db'
33 |   repo_config = feast.RepoConfig(provider='local', project='default')
34 |   FeastExampleGen(repo_config=repo_config,
35 |                   features=['feature1', 'feature2'],
36 |                   entity_query='SELECT user FROM fake_db')
37 |   FeastExampleGen(repo_config=repo_config,
38 |                   features='feature_service1',
39 |                   entity_query='SELECT user FROM fake_db')
40 |   FeastExampleGen(repo_config=repo_config,
41 |                   features=['feature1', 'feature2'],
42 |                   input_config=Input(splits=[
43 |                       Input.Split(name='train', pattern=entity_query),
44 |                       Input.Split(name='eval', pattern=entity_query),
45 |                   ]))
46 | 
47 | 
48 | def test_input_and_entity():
49 |   entity_query = 'SELECT user FROM fake_db'
50 |   repo_config = feast.RepoConfig(provider='local', project='default')
51 |   with pytest.raises(RuntimeError):
52 | 
53 |     FeastExampleGen(repo_config=repo_config,
54 |                     features=['feature1', 'feature2'],
55 |                     entity_query=entity_query,
56 |                     input_config=Input(splits=[
57 |                         Input.Split(name='train', pattern=entity_query),
58 |                         Input.Split(name='eval', pattern=entity_query),
59 |                     ]))
60 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution Guidelines
 2 | 
 3 | ## Directory Structure
 4 | The repo contains three main directories as follows:
 5 | - **[Component](./component):** Contains the main component code with a separate file for the executor code
 6 | - **[Data](./data):** Containing the sample data to be used for testing
 7 | - **[Example](./example):** Contains example codes to test our component with the CSVs present in [data](./data)
 8 | 
 9 | ## A few Git and GitHub practices
10 | 
11 | ### Commits
12 | Commits serve as checkpoints during your workflow and can be used to **revert back** in case something gets messed up.
13 | - **When to commit:** Try not to pile up many changes in multiple commits while ensuring that you don't make too many commits for fixing a small issue.
14 | - **Commit messages:** Commit messages should be descriptive enough for an external person to get an idea of what it accomplished while ensuring they don't exceed 50 characters.
15 | 
16 | Check out [this](https://gist.github.com/turbo/efb8d57c145e00dc38907f9526b60f17) for more information about the good practices
17 | 
18 | ### Branches
19 | Branches are a good way to simulataniously work on different features at the same time. Check out [git-scm](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging) to know more about various concepts involved in the same.
20 | 
21 | For descriptive branch names, it is a good idea to follow the following format:
22 | **`name/keyword/short-description`**
23 | - **Name:** Name of the person/s working on the branch. This can be ignored if many people(>2) are expected to work on it.
24 | - **Keyword:** This describes what "type" of work this branch is supposed to do. These are typically named as:
25 |     - `feature`: Adding/expanding a feature
26 |     - `base`: Adding boilerplate/readme/templates etc.
27 |     - `bug`: Fixes a bug
28 |     - `junk`: Throwaway branch created to experiment
29 | - **Short description:** As the name suggests, this contains a short description about the branch, usually no longer than 2-3 words separated by a hyphen (`-`).
30 | 
31 | P.S. If multiple branches are being used to work on the same issue (say issue `#n`), they can be named as `name/keyword/#n-short-description`
32 | 
33 | ### Issues 
34 | The following points should be considered while creating new issues
35 | - Use relevant labels like `bug`, `feature` etc.
36 | - If the team has decided the person who will work on it, it should be **assigned** to the said person as soon as possible to prevent same work being done twice.
37 | - The issue should be linked in the **project** if needed and the status of the same should be maintained as the work progresses.
38 | 
39 | ### Pull Requests
40 | It is always a good idea to ensure the following are present in your Pull Request description:
41 | - Relevant issue/s
42 | - What it accomplished
43 | - Mention `[WIP]` in title and make it a `Draft Pull Request` if it is a work in progress
44 | - Once the pull request is final, it should be **requested for review** from the concerned people
45 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/README.md:
--------------------------------------------------------------------------------
 1 | #### SIG TFX-Addons
 2 | # Project Proposal
 3 | 
 4 | **Your name:** Nirzari Gupta
 5 | 
 6 | **Your email:** nirzu97@gmail.com
 7 | 
 8 | **Your company/organization:** Outreachy
 9 | 
10 | **Project name:** [Feature selection custom component](https://github.com/tensorflow/tfx-addons/issues/7)
11 | 
12 | ## Project Description
13 | This project provides a facility to perform various feature selection algorithms on datasets in TFX pipelines. Additionally, feature scores for selected features will also be generated as a custom artifact.
14 | 
15 | ## Project Category
16 | Component
17 | 
18 | ## Project Use-Case(s)
19 | This project will allow the user to select different algorithms for performing feature selection on datasets artifacts in TFX pipelines.
20 | 
21 | ## Project Implementation
22 | Feature Selection Custom Component is implemented as Python function-based component.
23 | 
24 | Implementation of the Feature Selection Custom Component is done using the following approach:
25 | - Get dataset artifact generated by ExampleGen
26 | - Convert it into the format compatible with Scikit-Learn functions (TFRecord to numpy disctionaries)
27 | - Perform univariate feature selection with `SelectorFunc` specified in the module file
28 | - Output the following two artifacts:
29 |     - `updated_data`: Duplicate of the input `Example` artifact, but with updated URI and data values
30 |     - `feature_selection`: Contains data about the feature selection process with the following values available:
31 |         - `scores`: Metric scores from the selector
32 |         - `p_values`: Calculated p-values from the selector
33 |         - `selected_features`: List of selected columns afetr feature selection
34 | 
35 | ## Module file
36 | #### Structure
37 | The module file is required to have a structure with the following three values:
38 | - `SELECTOR_PARAMS`: Parameters for `SelectorFunc`
39 | - `TARGET_FEATURE`: The target feature in the dataset
40 | - `SelectorFunc`: Univariate function for feature selection
41 | 
42 | #### Example module file
43 | In the below example, we have used sklearn functions directly for simplicity. You may define custom functions while ensuring that the overall i/o structure is the same.
44 | ``` python
45 | from sklearn.feature_selection import SelectKBest as SelectorFunc
46 | from sklearn.feature_selection import chi2
47 | 
48 | SELECTOR_PARAMS = {"score_func": chi2, "k": 2}
49 | TARGET_FEATURE = 'species'
50 | ```
51 | 
52 | ## Example usage
53 | You may use the feature selection component in a way similar to [StatisticsGen](https://www.tensorflow.org/tfx/guide/statsgen)
54 | ``` python
55 | feature_selector = FeatureSelection(
56 |     orig_examples = example_gen.outputs['examples'],
57 |     module_file='example.modules.iris_module_file'
58 |     )
59 | ```
60 | 
61 | 
62 | ## Project Dependencies
63 | The implementation will use the [Scikit-learn feature selection functions](https://scikit-learn.org/stable/modules/feature_selection.html)
64 | 
65 | ## Project Team
66 | **Project Leader** : Nirzari Gupta, nirzu97, nirzu97@gmail.com
67 | 1. Fatimah Adwan, FatimahAdwan, akilahafaf72@gmail.com
68 | 2. Kshitijaa Jaglan, deutranium, jaglan.kshitijaa2@gmail.com
69 | 3. Pratishtha Abrol, pratishtha-abrol, pratishthaabrol@gmail.com


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/RELEASE.md:
--------------------------------------------------------------------------------
1 | # Current Version (v 1.0.0)
2 | 
3 | ### Last Update: 3 June 2022
4 | 
5 | ## Major Features and Improvements
6 | * Add feature selection functionality through custom or sklearn functions (to be accessed through a module file)
7 | * Compatible with any number of splits in the data
8 | * Returns two different artifacts - `updated_data` for the overall data processed (structured like the input Example artifact) and `feature_selection` for scores, p_values and selected features for reference
9 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/data/data.csv:
--------------------------------------------------------------------------------
1 | pickup_community_area,fare,trip_start_month,trip_start_hour,trip_start_day,trip_start_timestamp,pickup_latitude,pickup_longitude,dropoff_latitude,dropoff_longitude,trip_miles,pickup_census_tract,dropoff_census_tract,payment_type,company,trip_seconds,dropoff_community_area,tips
2 | 60,27.05,10,2,3,1380593700,41.836150155,-87.648787952,,,12.6,,,Cash,Taxi Affiliation Services,1380,,0.0
3 | 10,5.85,10,1,2,1382319000,41.985015101,-87.804532006,,,0.0,,,Cash,Taxi Affiliation Services,180,,0.0
4 | 14,16.65,5,7,5,1369897200,41.968069,-87.721559063,,,0.0,,,Cash,Dispatch Taxi Affiliation,1080,,0.0
5 | 13,16.45,11,12,3,1446554700,41.983636307,-87.723583185,,,6.9,,,Cash,,780,,0.0
6 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/example/modules/iris_module_file.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Supplement for iris species example with specifics feature modification.
16 | This module file will be used in the feature selection component example.
17 | """
18 | 
19 | from sklearn.feature_selection import \
20 |     SelectKBest as SelectorFunc  # pylint: disable=W0611
21 | from sklearn.feature_selection import chi2
22 | 
23 | SELECTOR_PARAMS = {"score_func": chi2, "k": 2}
24 | TARGET_FEATURE = 'species'
25 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/example/modules/penguins_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Supplement for palmer penguins example with specific feature modification.
16 | This module file will be used in the feature selection component example.
17 | """
18 | from sklearn.feature_selection import \
19 |     SelectKBest as SelectorFunc  # pylint: disable=W0611
20 | from sklearn.feature_selection import chi2
21 | 
22 | SELECTOR_PARAMS = {"score_func": chi2, "k": 2}
23 | TARGET_FEATURE = 'species'
24 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/example/modules/pima_indians_module_file.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Supplement for prima Indians Diabetics example with specifics feature modification.
16 | This module file will be used in the feature selection component example.
17 | """
18 | from sklearn.feature_selection import \
19 |     SelectKBest as SelectorFunc  # pylint: disable=W0611
20 | from sklearn.feature_selection import chi2
21 | 
22 | SELECTOR_PARAMS = {"score_func": chi2, "k": 3}
23 | TARGET_FEATURE = 'Outcome'
24 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/nb/Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "408bf10c",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from component import FeatureSelection\n",
 11 |     "from tfx.components import CsvExampleGen\n",
 12 |     "from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "id": "95533af7",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "context = InteractiveContext()"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "id": "d1e35dbe",
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import urllib.request\n",
 33 |     "import tempfile\n",
 34 |     "import os\n",
 35 |     "\n",
 36 |     "# getting data and setup CsvExampleGen\n",
 37 |     "DATA_ROOT = tempfile.mkdtemp(prefix='tfx-data')  # Create a temporary directory.\n",
 38 |     "_data_url = 'https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/penguin/data/labelled/penguins_processed.csv'\n",
 39 |     "_data_filepath = os.path.join(DATA_ROOT, \"data.csv\")\n",
 40 |     "urllib.request.urlretrieve(_data_url, _data_filepath)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "36c3d298",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "example_gen = CsvExampleGen(input_base=DATA_ROOT)\n",
 51 |     "context.run(example_gen)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "id": "fa28bcd8",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# give path to the module file\n",
 62 |     "feature_selector = FeatureSelection(orig_examples = example_gen.outputs['examples'],\n",
 63 |     "                                   module_file=\"module_file\")"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "id": "9afcfe7f",
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "context.run(feature_selector)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "id": "b088c2c8",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": []
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python 3 (ipykernel)",
 88 |    "language": "python",
 89 |    "name": "python3"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.7.10"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 5
106 | }
107 | 


--------------------------------------------------------------------------------
/tfx_addons/feature_selection/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit_learn==1.1.2
2 | tensorflow
3 | tfx
4 | tfx_bsl==1.9.0
5 | 


--------------------------------------------------------------------------------
/tfx_addons/firebase_publisher/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version (Still in Development)
 2 | 
 3 | ### Last Update: 15 September 2021
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   None at this time
 8 | 
 9 | ## Breaking Changes
10 | 
11 | *   None at this time
12 | 
13 | ## Deprecations
14 | 
15 | *   None at this time
16 | 
17 | ## Bug Fixes and Other Changes
18 | 
19 | *   None at this time
20 | 
21 | ## Documentation Updates
22 | 
23 | *   None at this time


--------------------------------------------------------------------------------
/tfx_addons/firebase_publisher/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Init module for FirebasePublisher"""
16 | 
17 | from tfx_addons.firebase_publisher.component import FirebasePublisher
18 | 


--------------------------------------------------------------------------------
/tfx_addons/firebase_publisher/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for TFX Firebase Publisher Custom Component."""
16 | 
17 | import tensorflow as tf
18 | from tfx.types import standard_artifacts
19 | 
20 | from tfx_addons.firebase_publisher.component import FirebasePublisher
21 | 
22 | 
23 | class FirebasePublisherTest(tf.test.TestCase):
24 |   def testConstruct(self):
25 |     firebase_publisher = FirebasePublisher(display_name="test_display_name",
26 |                                            storage_bucket="storage_bucket")
27 | 
28 |     self.assertEqual(standard_artifacts.PushedModel.TYPE_NAME,
29 |                      firebase_publisher.outputs['pushed_model'].type_name)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |   tf.test.main()
34 | 


--------------------------------------------------------------------------------
/tfx_addons/firebase_publisher/executor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Firebase Publisher TFX Component Executor.
16 | 
17 | The Firebase Publisher Executor calls the workflow handler
18 | runner.deploy_model_for_firebase_ml().
19 | """
20 | 
21 | import time
22 | from typing import Any, Dict, List
23 | 
24 | from tfx import types
25 | from tfx.components.pusher import executor as tfx_pusher_executor
26 | from tfx.types import artifact_utils, standard_component_specs
27 | 
28 | from tfx_addons.firebase_publisher import runner
29 | 
30 | _APP_NAME_KEY = "app_name"
31 | _DISPLAY_NAME_KEY = "display_name"
32 | _STORAGE_BUCKET_KEY = "storage_bucket"
33 | _TAGS_KEY = "tags"
34 | _OPTIONS_KEY = "options"
35 | _CREDENTIAL_PATH_KEY = "credential_path"
36 | 
37 | 
38 | class Executor(tfx_pusher_executor.Executor):
39 |   """Pushes a model to Firebase ML."""
40 |   def Do(
41 |       self,
42 |       input_dict: Dict[str, List[types.Artifact]],
43 |       output_dict: Dict[str, List[types.Artifact]],
44 |       exec_properties: Dict[str, Any],
45 |   ):
46 |     """Overrides the tfx_pusher_executor to leverage some of utility methods
47 | 
48 |     Args:
49 |       input_dict: Input dict from input key to a list of artifacts, including:
50 |         - model_export: a TFX input channel containing a Model artifact.
51 |         - model_blessing: a TFX input channel containing a ModelBlessing
52 |           artifact.
53 |       output_dict: Output dict from key to a list of artifacts, including:
54 |         - pushed_model: a TFX output channel containing a PushedModel artifact.
55 |           It contains information where the model is published at and whether
56 |           the model is pushed or not.
57 |       exec_properties: An optional dict of execution properties, including:
58 |         - display_name: name to identify a hosted model in Firebase ML.
59 |             this should be a unique value because it will be used to search
60 |             a existing model to update.
61 |         - storage_bucket: GCS bucket where the hosted model will be stored.
62 |         - app_name: the name of Firebase app to determine the scope.
63 |         - tags: tags to be attached to the hosted ML model.
64 |         - credential_path: location of GCS or local file system where the
65 |           Service Account(SA) Key file is.
66 |         - options: additional configurations to be passed to initialize Firebase
67 |           app.
68 | 
69 |     Raises:
70 |       RuntimeError: when the size of model exceeds 40mb.
71 |     """
72 |     self._log_startup(input_dict, output_dict, exec_properties)
73 | 
74 |     model_push = artifact_utils.get_single_instance(
75 |         output_dict[standard_component_specs.PUSHED_MODEL_KEY])
76 |     if not self.CheckBlessing(input_dict):
77 |       self._MarkNotPushed(model_push)
78 |       return
79 |     model_path = self.GetModelPath(input_dict)
80 |     model_version_name = f"v{int(time.time())}"
81 | 
82 |     pushed_model_path = runner.deploy_model_for_firebase_ml(
83 |         app_name=exec_properties.get(_APP_NAME_KEY, '[DEFAULT]'),
84 |         display_name=exec_properties.get(_DISPLAY_NAME_KEY),
85 |         storage_bucket=exec_properties.get(_STORAGE_BUCKET_KEY),
86 |         credential_path=exec_properties.get(_CREDENTIAL_PATH_KEY, None),
87 |         tags=exec_properties.get(_TAGS_KEY, []),
88 |         options=exec_properties.get(_OPTIONS_KEY, {}),
89 |         model_path=model_path,
90 |         model_version=model_version_name,
91 |     )
92 | 
93 |     self._MarkPushed(model_push, pushed_destination=pushed_model_path)
94 | 


--------------------------------------------------------------------------------
/tfx_addons/firebase_publisher/runner_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for tfx_addons.firebase_publisher.runner."""
16 | 
17 | from unittest import mock
18 | from unittest.mock import Mock
19 | 
20 | import tensorflow as tf
21 | 
22 | from tfx_addons.firebase_publisher import runner
23 | 
24 | 
25 | class RunnerTest(tf.test.TestCase):
26 |   def testModelExistancy(self):
27 |     model_list = Mock()
28 |     model_list.models = ['model1']
29 |     self.assertTrue(runner.is_model_present(model_list))
30 | 
31 |     model_list.models = []
32 |     self.assertFalse(runner.is_model_present(model_list))
33 | 
34 |   @mock.patch('tfx_addons.firebase_publisher.runner.glob.glob')
35 |   def testModelPathAndType(self, mock_glob):
36 |     tmp_model_path = "/tmp/saved_model"
37 | 
38 |     mock_glob.return_value = [f"{tmp_model_path}/model.tflite"]
39 |     is_tflite, model_path = runner.get_model_path_and_type(tmp_model_path)
40 |     self.assertTrue(is_tflite)
41 |     self.assertEquals(f"{tmp_model_path}/model.tflite", model_path)
42 | 
43 |     mock_glob.return_value = []
44 |     is_tflite, model_path = runner.get_model_path_and_type(tmp_model_path)
45 |     self.assertFalse(is_tflite)
46 |     self.assertEquals(tmp_model_path, model_path)
47 | 
48 |   @mock.patch('tfx_addons.firebase_publisher.runner.fileio')
49 |   @mock.patch('tfx_addons.firebase_publisher.runner.tf.io.gfile.GFile')
50 |   def testCheckModelSize(self, mock_gfile, mock_fileio):
51 |     mock_source = Mock()
52 |     mock_source.as_dict.get.return_value = "mock_return"
53 | 
54 |     mock_gfile().__enter__.return_value.size.return_value = 83886080
55 |     mock_gfile().__exit__ = Mock(return_value=False)
56 | 
57 |     try:
58 |       runner.check_model_size(mock_source)
59 |     except RuntimeError:
60 |       self.fail("Runtime error occured unexpectedly")
61 | 
62 |     mock_fileio.remove()
63 |     mock_gfile().__enter__.return_value.size.return_value = 83886081
64 |     mock_gfile().__exit__ = Mock(return_value=False)
65 |     with self.assertRaises(RuntimeError):
66 |       runner.check_model_size(mock_source)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |   tf.test.main()
71 | 


--------------------------------------------------------------------------------
/tfx_addons/huggingface_pusher/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/huggingface_pusher/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for TFX HuggingFace Pusher Custom Component."""
16 | 
17 | import tensorflow as tf
18 | from tfx.types import channel_utils, standard_artifacts
19 | 
20 | from tfx_addons.huggingface_pusher.component import HFPusher
21 | 
22 | 
23 | class HFPusherTest(tf.test.TestCase):
24 |   def testConstruct(self):
25 |     test_model = channel_utils.as_channel([standard_artifacts.Model()])
26 |     hf_pusher = HFPusher(
27 |         username="test_username",
28 |         access_token="test_access_token",
29 |         repo_name="test_repo_name",
30 |         model=test_model,
31 |         space_config={
32 |             "repo_name": "test_repo_name",  # default: same as model repo_name
33 |             "app_path": "app.gradio",  # or app/gradio
34 |             "space_sdk": "gradio",  # or streamlit, default: gradio
35 |             "placeholders": {
36 |                 # look for $MODEL_REPO_ID, $MODEL_REPO_URL, $MODEL_VERSION
37 |                 # tokens in files to replace with appropriate values at runtime
38 |                 "MODEL_REPO_ID": "$MODEL_REPO_ID",
39 |                 "MODEL_REPO_URL": "$MODEL_REPO_URL",
40 |                 "MODEL_VERSION": "$MODEL_VERSION",
41 |             }
42 |         })
43 | 
44 |     self.assertEqual(
45 |         standard_artifacts.PushedModel.TYPE_NAME,
46 |         hf_pusher.outputs["pushed_model"].type_name,
47 |     )
48 | 
49 | 
50 | if __name__ == "__main__":
51 |   tf.test.main()
52 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version (Still in Development)
 2 | ### Last Update: 19 March 2022
 3 | 
 4 | <!--
 5 | ## Major Features and Improvements
 6 | 
 7 | ## Breaking Changes
 8 | 
 9 | ### For Pipeline Authors
10 | 
11 | ### For Component Authors
12 | 
13 | ## Deprecations
14 | 
15 | ## Bug Fixes and Other Changes
16 | 
17 | ## Dependency Updates
18 | 
19 | ## Documentation Updates
20 | 
21 | -->
22 | 
23 | # Version 1.0.0 (Initial release)
24 | 
25 | ## Major Features and Improvements
26 | 
27 | * Initial component release. The exit handler component supports two messaging providers:
28 |     * Slack
29 |     * Logging
30 | * Contributions to support more messaging providers (e.g. Twilio, Microsoft Teams) are welcomed and encouraged.
31 | * Initial release only supports Google Cloud Vertex deployments.
32 | * Initial documentation
33 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """ Message Exit Handler component """
16 | 
17 | import json
18 | 
19 | from absl import logging
20 | from kfp.pipeline_spec import pipeline_spec_pb2
21 | from tfx import v1 as tfx
22 | from tfx.utils import proto_utils
23 | 
24 | from tfx_addons.message_exit_handler import constants
25 | from tfx_addons.message_exit_handler.message_providers.base_provider import \
26 |     MessagingType
27 | from tfx_addons.message_exit_handler.message_providers.logging_provider import \
28 |     LoggingMessageProvider
29 | from tfx_addons.message_exit_handler.message_providers.slack_provider import \
30 |     SlackMessageProvider
31 | 
32 | 
33 | @tfx.orchestration.experimental.exit_handler
34 | def MessageExitHandler(
35 |     final_status: tfx.dsl.components.Parameter[str],
36 |     on_failure_only: tfx.dsl.components.Parameter[bool] = False,
37 |     message_type: tfx.dsl.components.Parameter[str] = MessagingType.LOGGING.
38 |     value,
39 |     slack_credentials: tfx.dsl.components.Parameter[str] = None,
40 |     decrypt_fn: tfx.dsl.components.Parameter[str] = None,
41 | ):
42 |   """
43 |     Exit handler component for TFX pipelines originally developed by
44 |     Digits Financial, Inc.
45 |     The handler notifies the user of the final pipeline status via Slack.
46 | 
47 |     Args:
48 |         final_status: The final status of the pipeline.
49 |         slack_credentials: (Optional) The credentials to use for the
50 |                            Slack API calls, json format.
51 |         on_failure_only: (Optional) Whether to notify only on failure.
52 |             False is the default.
53 |         message_type: (Optional) The type of message to send.
54 |             Logging is the default.
55 |         decrypt_fn: (Optional) The function to use to decrypt the credentials,
56 |         'tfx_addons.message_exit_handler.component_tests.fake_decryption_fn'
57 | 
58 |     """
59 | 
60 |   # parse the final status
61 |   pipeline_task_status = pipeline_spec_pb2.PipelineTaskFinalStatus()
62 |   proto_utils.json_to_proto(final_status, pipeline_task_status)
63 |   logging.debug(f"MessageExitHandler: {final_status}")
64 |   status = json.loads(final_status)
65 | 
66 |   # leave the exit handler if pipeline succeeded and on_failure_only is True
67 |   if on_failure_only and status["state"] == constants.SUCCESS_STATUS:
68 |     logging.info("MessageExitHandler: Skipping notification on success.")
69 |     return
70 | 
71 |   # create the message provider
72 |   if message_type == MessagingType.SLACK.value:
73 |     # parse slack credentials
74 |     if not slack_credentials:
75 |       raise ValueError("Slack credentials not provided.")
76 |     provider = SlackMessageProvider(status=status,
77 |                                     credentials=slack_credentials,
78 |                                     decrypt_fn=decrypt_fn)
79 |   elif message_type == MessagingType.LOGGING.value:
80 |     provider = LoggingMessageProvider(status=status)
81 |   else:
82 |     raise ValueError(
83 |         f"MessageExitHandler: Unknown message type: {message_type}")
84 | 
85 |   provider.send_message()
86 |   message = provider.get_message()
87 |   logging.info(f"MessageExitHandler: {message}")
88 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """ Constants for the Message Exit Handler """
16 | 
17 | SUCCESS_STATUS = "SUCCEEDED"
18 | FAILURE_STATUS = "FAILED"
19 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/base_provider.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Message Providers supported by the Message Exit Handler component.
16 | 
17 | Currently supported:
18 | * Logging
19 | * Slack
20 | 
21 | """
22 | 
23 | import enum
24 | from typing import Dict, Text
25 | 
26 | from tfx_addons.message_exit_handler import constants
27 | 
28 | 
29 | class MessagingType(enum.Enum):
30 |   """Determines the type of message to send."""
31 | 
32 |   LOGGING = "logging"
33 |   SLACK = "slack"
34 | 
35 | 
36 | class BaseProvider:
37 |   """Message provider interface."""
38 |   def __init__(self, status: Dict) -> None:
39 |     self._status = status
40 |     self._message = self.set_message(status)
41 | 
42 |   @staticmethod
43 |   def set_message(status) -> str:
44 |     """Set the message to be sent."""
45 |     # parse the Vertex paths
46 |     # structure: projects/{project}/locations/{location}/pipelineJobs/{pipeline_job}
47 |     elements = status["pipelineJobResourceName"].split("/")
48 |     project = elements[1]
49 |     location = elements[3]
50 |     job_id = elements[-1]
51 | 
52 |     # Generate message
53 |     if status["state"] == constants.SUCCESS_STATUS:
54 |       message = (
55 |           ":tada: "
56 |           f"Pipeline job *{job_id}* ({project}) completed successfully.\n")
57 |     else:
58 |       message = f":scream: Pipeline job *{job_id}* ({project}) failed."
59 |       message += f"\n>{status['error']['message']}"
60 | 
61 |     message += f"\nhttps://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{job_id}"
62 |     return message
63 | 
64 |   def get_message(self) -> Text:
65 |     """Get the message to be sent."""
66 |     return self._message
67 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/base_provider_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for Base Provider functions."""
16 | 
17 | import tensorflow as tf
18 | 
19 | from tfx_addons.message_exit_handler import constants
20 | from tfx_addons.message_exit_handler.message_providers import base_provider
21 | 
22 | SUCCESS_MESSAGE = """:tada: Pipeline job *test-pipeline-job* (test-project) completed successfully.
23 | 
24 | https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/test-pipeline-job"""
25 | 
26 | FAILURE_MESSAGE = """:scream: Pipeline job *test-pipeline-job* (test-project) failed.
27 | >test error
28 | https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/test-pipeline-job"""
29 | 
30 | 
31 | class MessageProviderTest(tf.test.TestCase):
32 |   @staticmethod
33 |   def get_final_status(state: str = constants.SUCCESS_STATUS,
34 |                        error: str = "") -> str:
35 |     """Assemble final status for tests"""
36 |     status = {
37 |         "state":
38 |         state,
39 |         "error":
40 |         error,
41 |         "pipelineJobResourceName":
42 |         ("projects/test-project/locations/"
43 |          "us-central1/pipelineJobs/test-pipeline-job"),
44 |     }
45 |     if error:
46 |       status.update({"error": {"message": error}})
47 |     return status
48 | 
49 |   def test_message_provider_success(self):
50 |     final_status = self.get_final_status()
51 |     test_provider = base_provider.BaseProvider(final_status)
52 |     self.assertEqual(SUCCESS_MESSAGE, test_provider.get_message())
53 | 
54 |   def test_message_provider_failure(self):
55 |     final_status = self.get_final_status(state=constants.FAILURE_STATUS,
56 |                                          error="test error")
57 |     test_provider = base_provider.BaseProvider(final_status)
58 |     self.assertEqual(FAILURE_MESSAGE, test_provider.get_message())
59 | 
60 | 
61 | if __name__ == "__main__":
62 |   tf.test.main()
63 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/logging_provider.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """ Message provider interface for logging messages. """
16 | 
17 | from typing import Dict, Optional
18 | 
19 | from absl import logging
20 | 
21 | from tfx_addons.message_exit_handler.message_providers.base_provider import \
22 |     BaseProvider
23 | 
24 | 
25 | class LoggingMessageProvider(BaseProvider):
26 |   """Logging message provider."""
27 |   def __init__(
28 |       self,
29 |       status: Dict,
30 |       log_level: Optional[int] = logging.INFO,
31 |   ) -> None:
32 |     super().__init__(status=status)
33 |     self._log_level = log_level
34 | 
35 |   def send_message(self) -> None:
36 |     logging.log(self._log_level, f"MessageExitHandler: {self._message}")
37 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/logging_provider_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for Logging Provider functions."""
16 | 
17 | import tensorflow as tf
18 | 
19 | from tfx_addons.message_exit_handler import constants
20 | from tfx_addons.message_exit_handler.message_providers import logging_provider
21 | 
22 | SUCCESS_MESSAGE = """:tada: Pipeline job *test-pipeline-job* (test-project) completed successfully.
23 | 
24 | https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/test-pipeline-job"""
25 | 
26 | FAILURE_MESSAGE = """:scream: Pipeline job *test-pipeline-job* (test-project) failed.
27 | >test error
28 | https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/test-pipeline-job"""
29 | 
30 | 
31 | class LoggingProviderTest(tf.test.TestCase):
32 |   @staticmethod
33 |   def get_final_status(state: str = constants.SUCCESS_STATUS,
34 |                        error: str = "") -> str:
35 |     """Assemble final status for tests"""
36 |     status = {
37 |         "state":
38 |         state,
39 |         "error":
40 |         error,
41 |         "pipelineJobResourceName":
42 |         ("projects/test-project/locations/"
43 |          "us-central1/pipelineJobs/test-pipeline-job"),
44 |     }
45 |     if error:
46 |       status.update({"error": {"message": error}})
47 |     return status
48 | 
49 |   def test_logging_message_provider_success(self):
50 |     final_status = self.get_final_status()
51 |     with self.assertLogs(level="INFO") as logs:
52 |       message_provider = logging_provider.LoggingMessageProvider(final_status)
53 |       message_provider.send_message()
54 |       self.assertLen(logs.output, 1)
55 |       self.assertEqual(
56 |           "INFO:absl:MessageExitHandler: " + SUCCESS_MESSAGE,
57 |           logs.output[0],
58 |       )
59 | 
60 |   def test_logging_message_provider_failure(self):
61 |     final_status = self.get_final_status(state=constants.FAILURE_STATUS,
62 |                                          error="test error")
63 |     with self.assertLogs(level="INFO") as logs:
64 |       message_provider = logging_provider.LoggingMessageProvider(final_status)
65 |       message_provider.send_message()
66 |       self.assertLen(logs.output, 1)
67 |       self.assertEqual(
68 |           "INFO:absl:MessageExitHandler: " + FAILURE_MESSAGE,
69 |           logs.output[0],
70 |       )
71 | 
72 | 
73 | if __name__ == "__main__":
74 |   tf.test.main()
75 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/slack_provider.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """ Message provider interface for slack messages. """
16 | 
17 | from typing import Dict, Optional
18 | 
19 | from absl import logging
20 | from pydantic import BaseModel
21 | from slack import WebClient
22 | from slack.errors import SlackApiError
23 | from tfx.utils import import_utils
24 | 
25 | from tfx_addons.message_exit_handler.message_providers.base_provider import \
26 |     BaseProvider
27 | 
28 | 
29 | class SlackCredentials(BaseModel):
30 |   """Pydantic class to de/serialize the slack credentials."""
31 |   slack_token: str
32 |   slack_channel_id: str
33 | 
34 | 
35 | class SlackMessageProvider(BaseProvider):
36 |   """Slack message provider."""
37 |   def __init__(self,
38 |                status: Dict,
39 |                credentials: str,
40 |                decrypt_fn: Optional[str] = None) -> None:
41 |     super().__init__(status=status)
42 | 
43 |     if not credentials:
44 |       raise ValueError("Slack credentials not provided.")
45 | 
46 |     credentials = SlackCredentials.parse_raw(credentials)
47 |     self._slack_channel_id = credentials.slack_channel_id
48 |     self._slack_token = credentials.slack_token
49 | 
50 |     if decrypt_fn:
51 |       module_path, fn_name = decrypt_fn.rsplit(".", 1)
52 |       logging.info(
53 |           f"MessageExitHandler: Importing {fn_name} from {module_path} "
54 |           "to decrypt credentials.")
55 |       fn = import_utils.import_func_from_module(module_path, fn_name)
56 |       self._slack_channel_id = fn(self._slack_channel_id)
57 |       self._slack_token = fn(self._slack_token)
58 | 
59 |     self._client = WebClient(token=self._slack_token)
60 | 
61 |   def send_message(self) -> None:
62 |     try:
63 |       response = self._client.chat_postMessage(channel=self._slack_channel_id,
64 |                                                text=self._message)
65 |       logging.info(f"MessageExitHandler: Slack response: {response}")
66 |     except SlackApiError as e:
67 |       logging.error(
68 |           f"MessageExitHandler: Slack API error: {e.response['error']}")
69 | 


--------------------------------------------------------------------------------
/tfx_addons/message_exit_handler/message_providers/slack_provider_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for Slack Provider functions."""
16 | 
17 | from unittest.mock import patch
18 | 
19 | import tensorflow as tf
20 | 
21 | from tfx_addons.message_exit_handler import constants
22 | from tfx_addons.message_exit_handler.message_providers import slack_provider
23 | 
24 | 
25 | class SlackProviderTest(tf.test.TestCase):
26 |   @staticmethod
27 |   def get_final_status(state: str = constants.SUCCESS_STATUS,
28 |                        error: str = "") -> str:
29 |     """Assemble final status for tests"""
30 |     status = {
31 |         "state":
32 |         state,
33 |         "error":
34 |         error,
35 |         "pipelineJobResourceName":
36 |         ("projects/test-project/locations/"
37 |          "us-central1/pipelineJobs/test-pipeline-job"),
38 |     }
39 |     if error:
40 |       status.update({"error": {"message": error}})
41 |     return status
42 | 
43 |   @patch(
44 |       'tfx_addons.message_exit_handler.message_providers.slack_provider.WebClient'
45 |   )
46 |   def test_slack_message_provider(self, web_client_mock):
47 |     final_status = self.get_final_status()
48 |     credentials = slack_provider.SlackCredentials(
49 |         slack_token="test-token", slack_channel_id="test-channel").json()
50 | 
51 |     message_provider = slack_provider.SlackMessageProvider(
52 |         final_status, credentials)
53 |     message_provider.send_message()
54 |     web_client_mock.assert_called_once()
55 |     web_client_mock.assert_called_with(token='test-token')
56 | 
57 |   @patch(
58 |       'tfx_addons.message_exit_handler.message_providers.slack_provider.WebClient'
59 |   )
60 |   def test_slack_message_provider_with_decrypt_fn(self, mock_web_client):
61 |     final_status = self.get_final_status()
62 |     credentials = slack_provider.SlackCredentials(
63 |         slack_token="test-token", slack_channel_id="test-channel").json()
64 | 
65 |     message_provider = slack_provider.SlackMessageProvider(
66 |         final_status,
67 |         credentials,
68 |         decrypt_fn=
69 |         'tfx_addons.message_exit_handler.component_test.fake_decryption_fn')
70 |     message_provider.send_message()
71 |     mock_web_client.assert_called_once()
72 |     mock_web_client.assert_called_with(token='TEST-TOKEN')
73 | 
74 | 
75 | if __name__ == "__main__":
76 |   tf.test.main()
77 | 


--------------------------------------------------------------------------------
/tfx_addons/mlmd_client/README.md:
--------------------------------------------------------------------------------
 1 | # TFX MLMD Client Library
 2 | 
 3 | ## Project Description
 4 | 
 5 | Client library to inspect content in ML Metadata populated by TFX pipelines. Library will be written in Python and distributed through PyPi.
 6 | Given metadata connection information, it should provide easy to use methods to introspect the Metadata DB.
 7 | 
 8 | Idea from [#12](https://github.com/tensorflow/tfx-addons/issues/12)
 9 | 
10 | **Status**: Paused
11 | 


--------------------------------------------------------------------------------
/tfx_addons/mlmd_client/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version (Still in Development)
 2 | 
 3 | ### Last Update: 15 September 2021
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   None at this time
 8 | 
 9 | ## Breaking Changes
10 | 
11 | *   None at this time
12 | 
13 | ## Deprecations
14 | 
15 | *   None at this time
16 | 
17 | ## Bug Fixes and Other Changes
18 | 
19 | *   None at this time
20 | 
21 | ## Documentation Updates
22 | 
23 | *   None at this time


--------------------------------------------------------------------------------
/tfx_addons/mlmd_client/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """MLMDClient module"""
16 | __all__ = ["MetadataClient"]
17 | from .client import MetadataClient
18 | 


--------------------------------------------------------------------------------
/tfx_addons/mlmd_client/client_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for tfx_addons.mlmd_client.client."""
16 | import os
17 | 
18 | from ml_metadata.proto import metadata_store_pb2
19 | from tfx.dsl.component.experimental.annotations import (OutputArtifact,
20 |                                                         Parameter)
21 | from tfx.dsl.component.experimental.decorators import component
22 | from tfx.orchestration.local.local_dag_runner import LocalDagRunner
23 | from tfx.orchestration.pipeline import Pipeline
24 | from tfx.types.standard_artifacts import String
25 | 
26 | from tfx_addons.mlmd_client import client
27 | 
28 | 
29 | @component
30 | def print_component(word: Parameter[str], word_out: OutputArtifact[String]):
31 |   print(word)
32 |   word_out.value = word
33 | 
34 | 
35 | def _create_pipeline(root_dir: str):
36 |   comp = print_component(word="test")
37 |   connection_config = metadata_store_pb2.ConnectionConfig()
38 |   connection_config.sqlite.filename_uri = os.path.join(root_dir, "db.sqlite")
39 |   connection_config.sqlite.connection_mode = 3  # READWRITE_OPENCREATE
40 |   return Pipeline(
41 |       pipeline_root=root_dir,
42 |       pipeline_name="client_test",
43 |       metadata_connection_config=connection_config,
44 |       components=[comp],
45 |   )
46 | 
47 | 
48 | def test_pipeline_exists(tmpdir):
49 |   pipeline = _create_pipeline(tmpdir.mkdir("test").strpath)
50 |   LocalDagRunner().run(pipeline)
51 |   p = client.MetadataClient.from_pipeline(pipeline)
52 |   assert isinstance(p, client.PipelineContext)
53 | 
54 | 
55 | def test_get_artifacts(tmpdir):
56 |   pipeline = _create_pipeline(tmpdir.mkdir("test").strpath)
57 |   LocalDagRunner().run(pipeline)
58 |   p = client.MetadataClient.from_pipeline(pipeline)
59 |   assert isinstance(p, client.PipelineContext)
60 |   assert len(p.get_artifact_by_type_name('String')) == 1
61 | 


--------------------------------------------------------------------------------
/tfx_addons/model_card_generator/README.md:
--------------------------------------------------------------------------------
 1 | # TFX Model Card Generator
 2 | 
 3 | Idea from [#12](https://github.com/tensorflow/tfx-addons/issues/82)
 4 | 
 5 | **Status**: Active
 6 | 
 7 | Created by @shuklak13
 8 | 
 9 | The ModelCardGenerator TFX pipeline component generates model cards.
10 | 
11 | For the detailed model card format, see the
12 | [Model Card API](https://www.tensorflow.org/responsible_ai/model_card_toolkit/api_docs/python/model_card_toolkit/ModelCard).
13 | 
14 | For more general information about TFX, please see the
15 | [TFX User Guide](https://www.tensorflow.org/tfx/guide).
16 | 
17 | ## Configuring the ModelCardGenerator Component
18 | 
19 | The ModelCardGenerator takes
20 | [dataset statistics](https://www.tensorflow.org/tfx/guide/statsgen),
21 | [model evaluation](https://www.tensorflow.org/tfx/guide/evaluator), and a
22 | [pushed model](https://www.tensorflow.org/tfx/guide/pusher) to automatically
23 | populate parts of a model card.
24 | 
25 | [Model card fields](https://www.tensorflow.org/responsible_ai/model_card_toolkit/api_docs/python/model_card_toolkit/ModelCard)
26 | can also be explicitly populated with a JSON string (this can be generated using
27 | the [`json`](https://docs.python.org/3/library/json.html) module, see Example
28 | below). If a field is populated both by TFX and JSON, the JSON value will
29 | overwrite the TFX value.
30 | 
31 | The ModelCardGenerator writes model card documents to the `model_card/`
32 | directory of its artifact output. It uses a default HTML model card template,
33 | which is used to generate `model_card.html`. Custom
34 | [templates](https://www.tensorflow.org/responsible_ai/model_card_toolkit/guide/templates)
35 | can also be used; each template input must be accompanied by a file name output
36 | in the `template_io` arg.
37 | 
38 | ### Example
39 | 
40 | ```py
41 | import json
42 | 
43 | from tfx_addons.model_card_generator.component import ModelCardGenerator
44 | 
45 | ...
46 | model_card_fields = {
47 |   'model_details': {
48 |     'name': 'my_model',
49 |     'owners': 'Google',
50 |     'version': 'v0.1'
51 |   },
52 |   'considerations': {
53 |     'limitations': 'This is a demo model.'
54 |   }
55 | }
56 | mc_gen = ModelCardGenerator(
57 |     statistics=statistics_gen.outputs['statistics'],
58 |     evaluation=evaluator.outputs['evaluation'],
59 |     pushed_model=pusher.outputs['pushed_model'],
60 |     json=json.dumps(model_card_fields),
61 |     template_io=[
62 |         ('html/default_template.html.jinja', 'model_card.html'),
63 |         ('md/default_template.md.jinja', 'model_card.md')
64 |     ]
65 | )
66 | ```
67 | 
68 | More details are available in the
69 | [ModelCardGenerator](https://www.tensorflow.org/responsible_ai/model_card_toolkit/api_docs/python/model_card_toolkit/ModelCardGenerator)
70 | API reference.
71 | 
72 | See our
73 | [end-to-end demo](https://www.tensorflow.org/responsible_ai/model_card_toolkit/examples/MLMD_Model_Card_Toolkit_Demo)
74 | for a full working example.
75 | 


--------------------------------------------------------------------------------
/tfx_addons/model_card_generator/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version 
 2 | 
 3 | ### Last Update: 3 April 2023
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   Model Card Generator for TFX pipelines using the `model-card-toolkit`
 8 | 
 9 | ## Breaking Changes
10 | 
11 | *   None at this time
12 | 
13 | ## Deprecations
14 | 
15 | *   None at this time
16 | 
17 | ## Bug Fixes and Other Changes
18 | 
19 | *   None at this time
20 | 
21 | ## Documentation Updates
22 | 
23 | *   None at this time
24 | 


--------------------------------------------------------------------------------
/tfx_addons/model_card_generator/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Model Card Generator"""
16 | 


--------------------------------------------------------------------------------
/tfx_addons/model_card_generator/artifact.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """The ModelCard TFX/MLMD artifact."""
16 | 
17 | import datetime
18 | 
19 | import ml_metadata as mlmd
20 | from absl import logging
21 | from ml_metadata import errors
22 | from ml_metadata.proto import metadata_store_pb2
23 | from tfx.types.artifact import Artifact
24 | from tfx.types.system_artifacts import Metrics
25 | 
26 | 
27 | class ModelCard(Artifact):
28 |   """A [TFX/MLMD artifact](https://www.tensorflow.org/tfx/guide/mlmd#data_model) to model card assets.
29 | 
30 |   Assets include:
31 |     * a data file containing the model card fields, located at
32 |     `<uri>/data/model_card.proto`.
33 |     * the model card itself, located at the `<uri>/model_card/ directory`.
34 |   """
35 |   TYPE_NAME = 'ModelCard'
36 |   TYPE_ANNOTATION = Metrics
37 | 
38 | 
39 | def create_and_save_artifact(
40 |     artifact_name: str, artifact_uri: str,
41 |     store: mlmd.MetadataStore) -> metadata_store_pb2.Artifact:
42 |   """Generates and saves a ModelCard artifact to the specified MetadataStore.
43 | 
44 |   Args:
45 |     artifact_name: The name for the ModelCard artifact. A timestamp will be
46 |       appended to this to distinguish model cards created from the same job.
47 |     artifact_uri: The uri for the ModelCard artifact.
48 |     store: The MetadataStore where the ModelCard artifact and artifact type are
49 |       saved.
50 | 
51 |   Returns:
52 |     The saved artifact, which can be used to store model card assets.
53 |   """
54 | 
55 |   try:
56 |     type_id = store.get_artifact_type(ModelCard.TYPE_NAME).id
57 |   except errors.NotFoundError:
58 |     type_id = store.put_artifact_type(
59 |         metadata_store_pb2.ArtifactType(name=ModelCard.TYPE_NAME))
60 |   name = ''.join(
61 |       [artifact_name, '_',
62 |        datetime.datetime.now().strftime('%H:%M:%S')])
63 | 
64 |   # Save artifact to store. Also populates the artifact's id.
65 |   artifact_id = store.put_artifacts([
66 |       metadata_store_pb2.Artifact(type=ModelCard.TYPE_NAME,
67 |                                   type_id=type_id,
68 |                                   uri=artifact_uri,
69 |                                   name=name)
70 |   ])[0]
71 |   artifact = store.get_artifacts_by_id([artifact_id])[0]
72 |   logging.info(
73 |       'Successfully saved ModelCard artifact %s with uri=%s and id=%s.',
74 |       artifact.name, artifact.uri, artifact.id)
75 |   return artifact
76 | 


--------------------------------------------------------------------------------
/tfx_addons/model_card_generator/artifact_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for artifact."""
16 | 
17 | import ml_metadata as mlmd
18 | from absl.testing import absltest
19 | from ml_metadata.proto import metadata_store_pb2
20 | 
21 | from tfx_addons.model_card_generator import artifact
22 | 
23 | 
24 | class ArtifactTest(absltest.TestCase):
25 |   def setUp(self):
26 |     super(ArtifactTest, self).setUp()
27 |     connection_config = metadata_store_pb2.ConnectionConfig()
28 |     connection_config.fake_database.SetInParent()
29 |     self.store = mlmd.MetadataStore(connection_config)
30 | 
31 |   def test_create_and_save_artifact(self):
32 |     mc_artifact = artifact.create_and_save_artifact(
33 |         artifact_name='my model',
34 |         artifact_uri='/path/to/model/card/assets',
35 |         store=self.store)
36 | 
37 |     with self.subTest('saved_to_mlmd'):
38 |       self.assertCountEqual([mc_artifact],
39 |                             self.store.get_artifacts_by_id([mc_artifact.id]))
40 |     with self.subTest('properties'):
41 |       with self.subTest('type_id'):
42 |         self.assertEqual(mc_artifact.type_id,
43 |                          self.store.get_artifact_type('ModelCard').id)
44 |       with self.subTest('uri'):
45 |         self.assertEqual(mc_artifact.uri, '/path/to/model/card/assets')
46 |       with self.subTest('name'):
47 |         self.assertStartsWith(mc_artifact.name, 'my model_')
48 | 
49 | 
50 | if __name__ == '__main__':
51 |   absltest.main()
52 | 


--------------------------------------------------------------------------------
/tfx_addons/pandas_transform/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version  - 1.0.0
 2 | 
 3 | ### Last Update: 12 August 2022
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   Implements core functionality using native Pandas and Apache Beam
 8 | *   Does not implement a full pass over the dataset to  gather statistics.  Statistics are captured by StatisticsGen.
 9 | 
10 | ## Breaking Changes
11 | 
12 | *   None at this time
13 | 
14 | ## Deprecations
15 | 
16 | *   None at this time
17 | 
18 | ## Bug Fixes and Other Changes
19 | 
20 | *   None at this time
21 | 
22 | ## Documentation Updates
23 | 
24 | *   None at this time


--------------------------------------------------------------------------------
/tfx_addons/pandas_transform/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Init module for PandasTransform"""
16 | 
17 | from tfx_addons.pandas_transform.component import PandasTransform
18 | 
19 | __version__ = '1.0.0'
20 | 


--------------------------------------------------------------------------------
/tfx_addons/pandas_transform/null_preprocessing.py:
--------------------------------------------------------------------------------
1 | """ Null preprocessing, for minimal testing """
2 | from absl import logging
3 | 
4 | 
5 | # pylint: disable=unused-argument
6 | def preprocessing_fn(df, schema, statistics):
7 |   logging.info('Running null preprocessing')
8 |   return df
9 | 


--------------------------------------------------------------------------------
/tfx_addons/predictions_to_bigquery/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/predictions_to_bigquery/test_component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | # This code was originally written by Hannes Hapke (Digits Financial Inc.)
16 | # on Feb. 6, 2023.
17 | """
18 | Tests around Digits Prediction-to-BigQuery component.
19 | """
20 | 
21 | import tensorflow as tf
22 | from tfx.types import channel_utils, standard_artifacts
23 | 
24 | from . import component
25 | 
26 | 
27 | class ComponentTest(tf.test.TestCase):
28 |   def setUp(self):
29 |     super(ComponentTest, self).setUp()
30 |     self._transform_graph = channel_utils.as_channel(
31 |         [standard_artifacts.TransformGraph()])
32 |     self._inference_results = channel_utils.as_channel(
33 |         [standard_artifacts.InferenceResult()])
34 |     self._schema = channel_utils.as_channel([standard_artifacts.Schema()])
35 | 
36 |   def testConstruct(self):
37 |     # not a real test, just checking if if the component can be
38 |     # instantiated
39 |     _ = component.AnnotateUnlabeledCategoryDataComponent(
40 |         transform_graph=self._transform_graph,
41 |         inference_results=self._inference_results,
42 |         schema=self._schema,
43 |         bq_table_name="gcp_project:bq_database.table",
44 |         vocab_label_file="vocab_txt",
45 |         filter_threshold=0.1,
46 |         table_suffix="%Y",
47 |         table_partitioning=False,
48 |     )
49 | 
50 | 
51 | if __name__ == "__main__":
52 |   tf.test.main()
53 | 


--------------------------------------------------------------------------------
/tfx_addons/sampling/README.md:
--------------------------------------------------------------------------------
 1 | # Sampler component
 2 | 
 3 | A TFX component to sample examples.
 4 | 
 5 | The sampling component wraps an Apache Beam pipeline to process
 6 | data in an TFX pipeline. This component loads in tf.Record files from
 7 | an earlier example artifact, processes the 'train' split by default,
 8 | samples the split by a given label's classes, and stores the new
 9 | set of sampled examples into its own example artifact in
10 | tf.Record format.
11 | 
12 | The sampling is probabilistic estimation. Note that in small datasets
13 | this may result in worse datasets or such. This module is meant to
14 | approximate sampling using probability.
15 | 
16 | By default, the component will ignore all examples with a null value
17 | (more precisely, a value that evaluates to False) for the given label,
18 | although more values can be added in as necessary. Additionally, it will
19 | copy all non-'train' splits, through this behavior can be changed as well.
20 | The component will save the examples in a user-specified number of files,
21 | and it can be given a name as well.
22 | 
23 | ## Example usage
24 | ```
25 | import tfx_addons as tfxa
26 | 
27 | under = tfxa.sampling.Sampler(
28 |   examples=example_gen.outputs['examples'],
29 |   sampling_strategy=tfxa.sampling.SamplingStrategy.UNDERSAMPLE
30 | )
31 | ```
32 | 
33 | Component `outputs` contains:
34 |   - `sampled_examples`: Channel of type `standard_artifacts.Examples` for
35 |     materialized sampled examples, based on the input splits, which includes
36 |     copied splits unless otherwise specified by copy_others.
37 | 
38 | [Initial Proposal](https://github.com/tensorflow/tfx-addons/blob/main/proposals/20210721-sampling_component.md)
39 | 


--------------------------------------------------------------------------------
/tfx_addons/sampling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Sampling component"""
16 | __all__ = ["Sampler", "SamplingStrategy"]
17 | 
18 | from .component import Sampler
19 | from .spec import SamplingStrategy
20 | 


--------------------------------------------------------------------------------
/tfx_addons/sampling/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Component test for the sampling component."""
16 | 
17 | import tensorflow as tf
18 | from absl.testing import absltest
19 | from tfx.types import artifact_utils, channel_utils, standard_artifacts
20 | from tfx.utils import json_utils
21 | 
22 | from tfx_addons.sampling import component, spec
23 | 
24 | 
25 | class ComponentTest(absltest.TestCase):
26 |   def testConstruct(self):
27 |     examples = standard_artifacts.Examples()
28 |     examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
29 |     params = {
30 |         spec.SAMPLER_INPUT_KEY: channel_utils.as_channel([examples]),
31 |         spec.SAMPLER_SPLIT_KEY: ['train'],
32 |         spec.SAMPLER_LABEL_KEY: 'label'
33 |     }
34 | 
35 |     under = component.Sampler(**params)
36 | 
37 |     self.assertEqual(standard_artifacts.Examples.TYPE_NAME,
38 |                      under.outputs[spec.SAMPLER_OUTPUT_KEY].type_name)
39 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_SPLIT_KEY],
40 |                      json_utils.dumps(['train']))
41 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_LABEL_KEY],
42 |                      'label')
43 | 
44 |   def testConstructWithOptions(self):
45 |     examples = standard_artifacts.Examples()
46 |     examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
47 |     params = {
48 |         spec.SAMPLER_INPUT_KEY: channel_utils.as_channel([examples]),
49 |         spec.SAMPLER_LABEL_KEY: 'test_label',
50 |         spec.SAMPLER_SPLIT_KEY: ['train', 'eval'],
51 |         spec.SAMPLER_COPY_KEY: False,
52 |         spec.SAMPLER_SHARDS_KEY: 10,
53 |         spec.SAMPLER_CLASSES_KEY: ['label']
54 |     }
55 | 
56 |     under = component.Sampler(**params)
57 | 
58 |     self.assertEqual(standard_artifacts.Examples.TYPE_NAME,
59 |                      under.outputs[spec.SAMPLER_OUTPUT_KEY].type_name)
60 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_LABEL_KEY],
61 |                      'test_label')
62 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_SPLIT_KEY],
63 |                      json_utils.dumps(['train', 'eval']))
64 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_COPY_KEY], False)
65 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_SHARDS_KEY], 10)
66 |     self.assertEqual(under.spec.exec_properties[spec.SAMPLER_CLASSES_KEY],
67 |                      json_utils.dumps(['label']))
68 | 
69 | 
70 | if __name__ == '__main__':
71 |   tf.test.main()
72 | 


--------------------------------------------------------------------------------
/tfx_addons/sampling/data/example_gen/Split-eval/data_tfrecord-00000-of-00001.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/tfx-addons/724a2c095cc9aeb868b45ecf40a9c8832c94caaf/tfx_addons/sampling/data/example_gen/Split-eval/data_tfrecord-00000-of-00001.gz


--------------------------------------------------------------------------------
/tfx_addons/sampling/data/example_gen/Split-train/data_tfrecord-00000-of-00001.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/tfx-addons/724a2c095cc9aeb868b45ecf40a9c8832c94caaf/tfx_addons/sampling/data/example_gen/Split-train/data_tfrecord-00000-of-00001.gz


--------------------------------------------------------------------------------
/tfx_addons/sampling/data/test_data.csv:
--------------------------------------------------------------------------------
 1 | label,col1
 2 | ,2
 3 | ,2
 4 | ,2
 5 | ,2
 6 | ,2
 7 | ,2
 8 | ,2
 9 | ,2
10 | ,2
11 | ,2
12 | 1,1
13 | 1,1
14 | 1,1
15 | 1,1
16 | 1,1
17 | 1,1
18 | 1,1
19 | 1,1
20 | 1,1
21 | 1,1
22 | 1,1
23 | 1,1
24 | 1,1
25 | 1,1
26 | 1,1
27 | 1,1
28 | 1,1
29 | 1,1
30 | 1,1
31 | 1,1
32 | 1,1
33 | 1,1
34 | 0,0
35 | 0,0
36 | 0,0
37 | 0,0
38 | 0,0
39 | 0,0
40 | 0,0
41 | 0,0
42 | 0,0
43 | 0,0
44 | 0,0
45 | 0,0
46 | 0,0
47 | 0,0
48 | 0,0
49 | 0,0
50 | 0,0
51 | 0,0


--------------------------------------------------------------------------------
/tfx_addons/sampling/example/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/sampling/spec.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Sampling component definition."""
16 | 
17 | import enum
18 | 
19 | from tfx import types
20 | from tfx.types import standard_artifacts
21 | from tfx.types.component_spec import ChannelParameter, ExecutionParameter
22 | 
23 | SAMPLER_INPUT_KEY = 'input_data'
24 | SAMPLER_OUTPUT_KEY = 'output_data'
25 | SAMPLER_LABEL_KEY = 'label'
26 | SAMPLER_SPLIT_KEY = 'splits'
27 | SAMPLER_COPY_KEY = 'copy_others'
28 | SAMPLER_SHARDS_KEY = 'shards'
29 | SAMPLER_CLASSES_KEY = 'null_classes'
30 | SAMPLER_SAMPLE_KEY = 'sampling_strategy'
31 | 
32 | 
33 | class SamplingStrategy(enum.IntEnum):
34 |   """Determines which kind of sampling to perform."""
35 |   UNDERSAMPLE = 1
36 |   OVERSAMPLE = 2
37 | 
38 | 
39 | class SamplerSpec(types.ComponentSpec):
40 |   """Sampling component spec."""
41 | 
42 |   PARAMETERS = {
43 |       SAMPLER_LABEL_KEY: ExecutionParameter(type=str),
44 |       SAMPLER_SPLIT_KEY: ExecutionParameter(type=str, optional=True),
45 |       SAMPLER_COPY_KEY: ExecutionParameter(type=int, optional=True),
46 |       SAMPLER_SHARDS_KEY: ExecutionParameter(type=int, optional=True),
47 |       SAMPLER_CLASSES_KEY: ExecutionParameter(type=str, optional=True),
48 |       SAMPLER_SAMPLE_KEY: ExecutionParameter(type=int, optional=True),
49 |   }
50 |   INPUTS = {
51 |       SAMPLER_INPUT_KEY: ChannelParameter(type=standard_artifacts.Examples),
52 |   }
53 |   OUTPUTS = {
54 |       SAMPLER_OUTPUT_KEY: ChannelParameter(type=standard_artifacts.Examples),
55 |   }
56 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution Guidelines
 2 | 
 3 | ## Directory Structure
 4 | The repo contains three main directories as follows:
 5 | - **[Component](./component):** Contains the main component code with a separate file for the executor code
 6 | - **[Data](./data):** Containing the sample data to be used for testing
 7 | - **[Example](./example):** Contains example codes to test our component with the CSVs present in [data](./data)
 8 | 
 9 | ## A few Git and GitHub practices
10 | 
11 | ### Commits
12 | Commits serve as checkpoints during your workflow and can be used to **revert back** in case something gets messed up.
13 | - **When to commit:** Try not to pile up many changes in multiple commits while ensuring that you don't make too many commits for fixing a small issue.
14 | - **Commit messages:** Commit messages should be descriptive enough for an external person to get an idea of what it accomplished while ensuring they don't exceed 50 characters.
15 | 
16 | Check out [this](https://gist.github.com/turbo/efb8d57c145e00dc38907f9526b60f17) for more information about the good practices
17 | 
18 | ### Branches
19 | Branches are a good way to simulataniously work on different features at the same time. Check out [git-scm](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging) to know more about various concepts involved in the same.
20 | 
21 | For descriptive branch names, it is a good idea to follow the following format:
22 | **`name/keyword/short-description`**
23 | - **Name:** Name of the person/s working on the branch. This can be ignored if many people(>2) are expected to work on it.
24 | - **Keyword:** This describes what "type" of work this branch is supposed to do. These are typically named as:
25 |     - `feature`: Adding/expanding a feature
26 |     - `base`: Adding boilerplate/readme/templates etc.
27 |     - `bug`: Fixes a bug
28 |     - `junk`: Throwaway branch created to experiment
29 | - **Short description:** As the name suggests, this contains a short description about the branch, usually no longer than 2-3 words separated by a hyphen (`-`).
30 | 
31 | P.S. If multiple branches are being used to work on the same issue (say issue `#n`), they can be named as `name/keyword/#n-short-description`
32 | 
33 | ### Issues 
34 | The following points should be considered while creating new issues
35 | - Use relevant labels like `bug`, `feature` etc.
36 | - If the team has decided the person who will work on it, it should be **assigned** to the said person as soon as possible to prevent same work being done twice.
37 | - The issue should be linked in the **project** if needed and the status of the same should be maintained as the work progresses.
38 | 
39 | ### Pull Requests
40 | It is always a good idea to ensure the following are present in your Pull Request description:
41 | - Relevant issue/s
42 | - What it accomplished
43 | - Mention `[WIP]` in title and make it a `Draft Pull Request` if it is a work in progress
44 | - Once the pull request is final, it should be **requested for review** from the concerned people
45 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/README.md:
--------------------------------------------------------------------------------
 1 | # Schema Curation Custom Component
 2 | 
 3 | [![Python](https://img.shields.io/pypi/pyversions/tfx.svg?style=plastic)](https://github.com/tensorflow/tfx)
 4 | [![TensorFlow](https://img.shields.io/badge/TFX-orange)](https://www.tensorflow.org/tfx)
 5 | 
 6 | This is a TFX-component that allows its users to apply a user code to a schema produced by the [SchemaGen](https://www.tensorflow.org/tfx/guide/schemagen) component, and curate it based on domain knowledge. It fits seamlessly into the ML-pipline made with TFX, and allows schema manipulation based on a module file provided by the User.
 7 | 
 8 | ## Usage
 9 | ### Examples demonstrating how to use Schema curation component
10 | 
11 | To run locally: [taxi_example_local.py](https://github.com/rcrowe-google/schemacomponent/blob/main/example/taxi_example_local.py)
12 | TO run in colab: [taxi_example_colab.ipynb](https://github.com/rcrowe-google/schemacomponent/blob/main/example/taxi_example_colab.ipynb)
13 | 
14 | ## Documentation
15 | 
16 | ### Inputs:
17 | The custom component takes for input the user *module file*, and the *schema* generated by the SchemaGen component on the specified data.
18 | 
19 | ### Output:
20 | On running the component, it outputs the *modified schema* based on the code provided in the module file.
21 | 
22 | ## Module file
23 | 
24 | ### The Schema Curation *schema_fn*:
25 | The Schema Curation component provides a solution to curating the schema based on user knowledge. As a user, you only have to define a single function called the `schema_fn`. in `schema_fn` you define a series of funcitons that manipulate the input schema to produce the required one. 
26 | 
27 | An example is:
28 | 
29 | ```
30 | def schema_fn(schema):
31 |   """modifies the infered schema.
32 |   Args:
33 |     schema:schema generated by SchemaGen component of tfx
34 |   """
35 |   #changing "tips" into optional feature
36 |   feature = tfdv.get_feature(schema, 'tips') 
37 |   feature.presence.min_fraction = 0.9
38 |   
39 |   return schema
40 | ```
41 | 
42 | ## Project Structure
43 | 
44 | ### Directory Structure
45 | ```
46 | schemacomponent
47 | ├── component
48 | │   ├── component.py
49 | │   ├── component_test.py
50 | │   ├── executor.py
51 | |   ├── executor_test.py
52 | │   ├── __init__.py
53 | ├── CONTRIBUTING.md
54 | ├── example
55 | │   ├── __init__.py
56 | │   ├── module_file.py
57 | │   ├── taxi_example_colab.ipynb
58 | │   ├── taxi_example_local.py
59 | ├── __init__.py
60 | ├── PROPOSAL.md
61 | └── README.md
62 | ```
63 | 
64 | 
65 | The project follows the structure specified by the [TFX](https://www.tensorflow.org/tfx) documentation for a [TFX fully custom component](https://www.tensorflow.org/tfx/guide/custom_component). 
66 | 
67 | The `SchemaCurationSpec` class defines the input, output and execution parameters required by the component.
68 | 
69 | The `Executor` class defines the functioning of the component, a subclass of the `base_executor.BaseExecutor` with the overriden `Do` function.
70 | 
71 | Finally the `SchemaCuration` class integrates the fully custom component into the ML pipeline.
72 | 
73 | ### Unit Tests
74 | 
75 | The component includes separate unit tests for the component and the executor. 
76 | 
77 | 
78 | ## Credits
79 | 
80 | Schema Curation Custom Component was made as a part of [TFX-Addons](https://github.com/tensorflow/tfx-addons/) through the [Outreachy](https://www.outreachy.org/outreachy-may-2021-internship-round/communities/tensorflow/#create-custom-components-and-tools-for-tensorflow-) program. You may view the linked Pull Request in TFX-Addons [here](https://github.com/tensorflow/tfx-addons/pull/32) and the issue [here](https://github.com/tensorflow/tfx-addons/issues/8) for relevant discussions related to the project.
81 | 
82 | ## The Team:
83 | ### Mentors:
84 | - Robert Crowe
85 | - Thea Lamkin
86 | - Josh Gordon
87 | 
88 | ### Interns:
89 | - [Pratishtha Abrol](https://github.com/pratishtha-abrol) **(Team Leader)**
90 | - [Fatimah Adwan](https://github.com/FatimahAdwan/)
91 | - [Kshitijaa Jaglan](https://github.com/deutranium/)
92 | - [Nirzari Gupta](https://github.com/Nirzu97)
93 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version (Still in Development)
 2 | 
 3 | ### Last Update: 15 September 2021
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   None at this time
 8 | 
 9 | ## Breaking Changes
10 | 
11 | *   None at this time
12 | 
13 | ## Deprecations
14 | 
15 | *   None at this time
16 | 
17 | ## Bug Fixes and Other Changes
18 | 
19 | *   None at this time
20 | 
21 | ## Documentation Updates
22 | 
23 | *   None at this time


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/component/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/component/component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TFX Schema Curation Custom Component
16 | """
17 | 
18 | from typing import Optional, Text, Union
19 | 
20 | from tfx import types
21 | from tfx.dsl.components.base import base_component, executor_spec
22 | from tfx.orchestration import data_types
23 | from tfx.types import standard_artifacts
24 | from tfx.types.component_spec import ChannelParameter, ExecutionParameter
25 | 
26 | from tfx_addons.schema_curation.component import executor
27 | 
28 | 
29 | class SchemaCurationSpec(types.ComponentSpec):
30 |   """ComponentSpec for TFX Schema Curation Custom Component."""
31 | 
32 |   PARAMETERS = {
33 |       'module_file': ExecutionParameter(type=str, optional=True),
34 |       'module_path': ExecutionParameter(type=str, optional=True),
35 |       'schema_fn': ExecutionParameter(type=str, optional=True)
36 |   }
37 |   INPUTS = {
38 |       'schema':
39 |       ChannelParameter(type=standard_artifacts.Schema
40 |                        ),  # Dictionary obtained as output from SchemaGen
41 |   }
42 |   OUTPUTS = {
43 |       'custom_schema':
44 |       ChannelParameter(type=standard_artifacts.Schema
45 |                        )  # Dictionary which containes new schema
46 |   }
47 | 
48 | 
49 | class SchemaCuration(base_component.BaseComponent):
50 |   """Custom TFX Schema Curation Component.
51 | 
52 |     The SchemaCuration component is used to apply user code to a schema
53 |     generated by SchemaGen in order to curate the schema based on
54 |     domain knowledge.
55 | 
56 |     Component `outputs` contains:
57 |      - `custom_schema`: Channel of type `standard_artifact.Schema`
58 |     """
59 | 
60 |   SPEC_CLASS = SchemaCurationSpec
61 |   EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
62 | 
63 |   def __init__(
64 |       self,
65 |       schema: types.Channel,
66 |       module_file: Optional[Union[Text, data_types.RuntimeParameter]] = None,
67 |       module_path: Optional[Union[Text, data_types.RuntimeParameter]] = None,
68 |       schema_fn: Optional[Union[Text, data_types.RuntimeParameter]] = None):
69 |     """Construct a SchemaCurationComponent.
70 | 
71 |         Args:
72 |           schema: A dictionary that containes the schema generated by
73 |             SchemaGen component of tfx
74 |           custom_schema: A dictionary that contains the schema after curation
75 |             by the custom schema curation component
76 |         """
77 | 
78 |     custom_schema = types.Channel(type=standard_artifacts.Schema)
79 | 
80 |     spec = SchemaCurationSpec(schema=schema,
81 |                               custom_schema=custom_schema,
82 |                               module_file=module_file,
83 |                               module_path=module_path,
84 |                               schema_fn=schema_fn)
85 |     super().__init__(spec=spec)
86 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/component/component_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for TFX Schema Curation Custom Component."""
16 | 
17 | import tensorflow as tf
18 | from tfx.types import channel_utils, standard_artifacts
19 | 
20 | from tfx_addons.schema_curation.component import component
21 | 
22 | 
23 | class SchemaCurationTest(tf.test.TestCase):
24 |   def testConstruct(self):
25 |     schema_curation = component.SchemaCuration(schema=channel_utils.as_channel(
26 |         [standard_artifacts.Schema()]), )
27 |     self.assertEqual(standard_artifacts.Schema.TYPE_NAME,
28 |                      schema_curation.outputs['custom_schema'].type_name)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |   tf.test.main()
33 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/example/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/example/module_file.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Supplement for chicago taxi pipeline example with specifics schema modification.
16 | This module file will be used in the custom schema curation component.
17 | """
18 | 
19 | import tensorflow_data_validation as tfdv
20 | 
21 | # TFX schema curation component will call this function.
22 | 
23 | 
24 | def schema_fn(schema):
25 |   """modifies the infered schema.
26 |   Args:
27 |     schema:schema generated by SchemaGen component of tfx
28 |   """
29 |   #changing "tips" into optional feature
30 |   feature = tfdv.get_feature(schema, 'tips')
31 |   feature.presence.min_fraction = 0.9
32 | 
33 |   return schema
34 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/example/taxi_example_local.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Chicago taxi example using TFX schema curation custom component.
16 | base code taken from: https://github.com/tensorflow/tfx/blob/master/tfx/examples/custom_components/hello_world/example/taxi_pipeline_hello.py
17 | 
18 | This example demonstrate the use of schema curation custom component.
19 | user defined function `schema_fn` defined in `module_file.py` is used
20 | to change feature `tips` from required to optional.
21 | 
22 | """
23 | 
24 | import os
25 | import tempfile
26 | import urllib
27 | from typing import Text
28 | 
29 | import absl
30 | import tfx
31 | from tfx.components import CsvExampleGen, SchemaGen, StatisticsGen
32 | from tfx.orchestration import metadata, pipeline
33 | from tfx.orchestration.local import local_dag_runner
34 | 
35 | from tfx_addons.schema_curation.component import component
36 | 
37 | # downloading data and setting up required paths
38 | _data_root = tempfile.mkdtemp(prefix='tfx-data')
39 | DATA_PATH = 'https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv'
40 | _data_filepath = os.path.join(_data_root, "data.csv")
41 | urllib.request.urlretrieve(DATA_PATH, _data_filepath)
42 | 
43 | _pipeline_name = 'taxi_pipeline'
44 | _tfx_root = tfx.__path__[0]
45 | _pipeline_root = os.path.join(_tfx_root, 'pipelines', _pipeline_name)
46 | _metadata_path = os.path.join(_tfx_root, 'metadata', _pipeline_name,
47 |                               'metadata.db')
48 | 
49 | 
50 | def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
51 |                      metadata_path: Text) -> pipeline.Pipeline:
52 |   """Implements the chicago taxi pipeline with TFX."""
53 | 
54 |   # Brings data into the pipeline or otherwise joins/converts training data.
55 |   example_gen = CsvExampleGen(input_base=data_root)
56 | 
57 |   # Computes statistics over data for visualization and example validation.
58 |   statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
59 | 
60 |   # inferes a schema
61 |   schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
62 |                          infer_feature_shape=True)
63 | 
64 |   # modifies infered schema with use of udf `schema_fn` defined in module file
65 |   schema_curation = component.SchemaCuration(
66 |       schema=schema_gen.outputs['schema'],
67 |       module_file=os.path.join('schemacomponent', 'example', 'module_file.py'))
68 | 
69 |   return pipeline.Pipeline(
70 |       pipeline_name=pipeline_name,
71 |       pipeline_root=pipeline_root,
72 |       components=[example_gen, statistics_gen, schema_gen, schema_curation],
73 |       enable_cache=True,
74 |       metadata_connection_config=metadata.sqlite_metadata_connection_config(
75 |           metadata_path))
76 | 
77 | 
78 | # To run this pipeline from the python CLI:
79 | #   $python taxi_pipeline_hello.py
80 | if __name__ == '__main__':
81 |   absl.logging.set_verbosity(absl.logging.INFO)
82 |   local_dag_runner.LocalDagRunner().run(
83 |       _create_pipeline(pipeline_name=_pipeline_name,
84 |                        pipeline_root=_pipeline_root,
85 |                        data_root=_data_root,
86 |                        metadata_path=_metadata_path))
87 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/test_data/module_file/module_file.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Supplement for chicago taxi pipeline example with specifics schema modification.
16 | This module file will be used in the custom schema curation component.
17 | """
18 | 
19 | import tensorflow_data_validation as tfdv
20 | 
21 | # TFX schema curation component will call this function.
22 | 
23 | 
24 | def schema_fn(schema):
25 |   """modifies the infered schema.
26 |   Args:
27 |     schema:schema generated by SchemaGen component of tfx
28 |   """
29 |   #changing "tips" into optional feature
30 |   feature = tfdv.get_feature(schema, 'tips')
31 |   feature.presence.min_fraction = 0.9
32 | 
33 |   return schema
34 | 


--------------------------------------------------------------------------------
/tfx_addons/schema_curation/test_data/schema_gen/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tfx_addons/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """ Util functions to assist with the TFX Addons tests """
16 | 
17 | from typing import List
18 | 
19 | 
20 | def get_tfx_version(version: str) -> List[int]:
21 |   """
22 |     Returns the TFX version as integers.
23 |   """
24 |   # NB(gcasassaez): Remove suffix to avoid parsing issues
25 |   version = version.split("-")[0]
26 |   return tuple([int(x) for x in version.split(".")])  # pylint: disable=R1728
27 | 


--------------------------------------------------------------------------------
/tfx_addons/utils/test_utils_tests.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for TFX Addons test util functions."""
16 | 
17 | import unittest
18 | 
19 | from tfx_addons.utils import test_utils
20 | 
21 | MESSAGE_FN_CALLED = "test_fn called"
22 | EXPECTED_WARNING_MESSAGE = (
23 |     "WARNING:absl:test_fn has been disabled due to incompatible TFX version.")
24 | 
25 | 
26 | def test_fn():
27 |   return MESSAGE_FN_CALLED
28 | 
29 | 
30 | class TestUtilTest(unittest.TestCase):
31 |   def test_get_tfx_version(self):
32 |     tfx_version = "1.4.0"
33 |     self.assertEqual(test_utils.get_tfx_version(tfx_version), (1, 4, 0))
34 | 
35 | 
36 | if __name__ == "__main__":
37 |   unittest.main()
38 | 


--------------------------------------------------------------------------------
/tfx_addons/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Define TFX Addons version information."""
16 | 
17 | # We follow Semantic Versioning (https://semver.org/)
18 | _MAJOR_VERSION = "0"
19 | _MINOR_VERSION = "7"
20 | _PATCH_VERSION = "0"
21 | 
22 | # When building releases, we can update this value on the release branch to
23 | # reflect the current release candidate ('rc0', 'rc1') or, finally, the official
24 | # stable release (indicated by `_VERSION_SUFFIX = ''`). Outside the context of a
25 | # release branch, the current version is by default assumed to be a
26 | # 'development' version, labeled 'dev'.
27 | _VERSION_SUFFIX = "dev"
28 | 
29 | # Example, '0.1.0-dev'
30 | __version__ = ".".join([_MAJOR_VERSION, _MINOR_VERSION, _PATCH_VERSION])
31 | if _VERSION_SUFFIX:
32 |   __version__ = "{}-{}".format(__version__, _VERSION_SUFFIX)
33 | 
34 | # Required TFX version [min, max)
35 | _INCLUSIVE_MIN_TFX_VERSION = "1.6.0"
36 | _EXCLUSIVE_MAX_TFX_VERSION = "1.11.0"
37 | _TFXVERSION_CONSTRAINT = (
38 |     f">={_INCLUSIVE_MIN_TFX_VERSION},<{_EXCLUSIVE_MAX_TFX_VERSION}")
39 | _CI_MAX_CONSTRAINTS = ["tfx~=1.10.0", "tensorflow~=2.9.0"]
40 | _CI_MIN_CONSTRAINTS = [
41 |     f"tfx~={_INCLUSIVE_MIN_TFX_VERSION}",
42 |     "tensorflow~=2.8.0",
43 | ]
44 | # This is a list of officially  maintained projects with their dependencies.
45 | # Any project added here will be automatically picked up on release.
46 | # - Key: Project name that corresponds to  folder tfx_addons.{} namespace.
47 | # - Value: Python dependencies needed for project to work.
48 | _PKG_METADATA = {
49 |     # Add dependencies here for your project. Avoid using install_requires.
50 |     "mlmd_client": [
51 |         f"ml_pipelines_sdk{_TFXVERSION_CONSTRAINT}",
52 |         f"ml_metadata{_TFXVERSION_CONSTRAINT}"
53 |     ],
54 |     "schema_curation": [
55 |         f"tfx{_TFXVERSION_CONSTRAINT}",
56 |     ],
57 |     "feature_selection":
58 |     [f"tfx{_TFXVERSION_CONSTRAINT}", "scikit_learn>=1.0.2,<2.0.0"],
59 |     "feast_examplegen": [
60 |         f"tfx{_TFXVERSION_CONSTRAINT}",
61 |         # ToDo(gcasassaez): Relax this once we stop supporting python 3.7
62 |         # feast>=0.23 upgrades to numpy>=1.22 which does not work on 3.7
63 |         "feast>=0.21.3,<0.23.0",
64 |     ],
65 |     "xgboost_evaluator": [
66 |         f"tfx{_TFXVERSION_CONSTRAINT}",
67 |         "xgboost>=1.0.0",
68 |     ],
69 |     "sampling": [f"tfx{_TFXVERSION_CONSTRAINT}", "tensorflow>=2.0.0"],
70 |     "message_exit_handler": [
71 |         f"tfx{_TFXVERSION_CONSTRAINT}",
72 |         "kfp>=1.8,<2.0",
73 |         "slackclient>=2.9.0,<3.0",
74 |         "pydantic>=1.8.0,<2.0",
75 |     ],
76 |     "pandas_transform": [f"tfx{_TFXVERSION_CONSTRAINT}", "pandas>=1.0.0,<2.0"],
77 |     "firebase_publisher":
78 |     [f"tfx{_TFXVERSION_CONSTRAINT}", "firebase-admin>=5.0.0,<6.0.0"],
79 |     "huggingface_pusher":
80 |     [f"tfx{_TFXVERSION_CONSTRAINT}", "huggingface-hub>=0.10.0,<1.0.0"],
81 |     "model_card_generator":
82 |     [f"tfx{_TFXVERSION_CONSTRAINT}", "model-card-toolkit>=2.0.0,<3.0.0"],
83 |     "predictions_to_bigquery": [f"tfx{_TFXVERSION_CONSTRAINT}"],
84 |     "copy_example_gen": [
85 |         f"tfx{_TFXVERSION_CONSTRAINT}",
86 |     ],
87 | }
88 | 


--------------------------------------------------------------------------------
/tfx_addons/xgboost_evaluator/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Current Version (Still in Development)
 2 | 
 3 | ### Last Update: 15 September 2021
 4 | 
 5 | ## Major Features and Improvements
 6 | 
 7 | *   None at this time
 8 | 
 9 | ## Breaking Changes
10 | 
11 | *   None at this time
12 | 
13 | ## Deprecations
14 | 
15 | *   None at this time
16 | 
17 | ## Bug Fixes and Other Changes
18 | 
19 | *   None at this time
20 | 
21 | ## Documentation Updates
22 | 
23 | *   None at this time


--------------------------------------------------------------------------------
/tfx_addons/xgboost_evaluator/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """XGBoost evaluator module"""
16 | __all__ = ["XGBoostEvaluator"]
17 | from .component import XGBoostEvaluator
18 | 


--------------------------------------------------------------------------------
/tfx_addons/xgboost_evaluator/component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """XGBoost Evaluator component."""
16 | 
17 | from tfx import v1 as tfx
18 | 
19 | from tfx_addons.xgboost_evaluator import xgboost_predict_extractor
20 | 
21 | 
22 | class XGBoostEvaluator(tfx.components.Evaluator):
23 |   """A custom Evaluator component made for XGBoost. Keeps everything the same,
24 |   except inputs the custom module file containing the XGBoost Extractor."""
25 |   def __init__(self, **kwargs):
26 |     if 'module_file' in kwargs:
27 |       raise ValueError('XGBoostEvaluator does not accept custom module_file')
28 |     super().__init__(module_file=xgboost_predict_extractor.get_module_file(),
29 |                      **kwargs)
30 | 


--------------------------------------------------------------------------------