├── .env.template
├── .flake8
├── .github
    └── workflows
    │   ├── docker.yml
    │   ├── docker_test.yml
    │   └── pr.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── data
    └── openimage_10.txt
├── flex
    └── metadata.json
├── my_project
    ├── __init__.py
    ├── config.py
    ├── pipeline.py
    └── run.py
├── pyproject.toml
├── pytorch_gpu.Dockerfile
├── requirements.dev.txt
├── requirements.prod.txt
├── scripts
    ├── check-beam.sh
    ├── check-pipeline.sh
    ├── check-tf-on-gpu.sh
    ├── check-torch-on-gpu.sh
    ├── create-gpu-vm.sh
    └── get_beam_version.py
├── setup.py
├── tensor_rt.Dockerfile
├── tensorflow_gpu.Dockerfile
├── tensorflow_gpu.flex.Dockerfile
└── tests
    ├── sample.env.pytorch
    ├── sample.env.tf
    └── test_pipeline.py


/.env.template:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ### PYTHON/SDK/DOCKER SETTINGS
 3 | ################################################################################
 4 | ##Pytorch + Py3.10 + Beam 2.47.0
 5 | PYTHON_VERSION=3.10
 6 | BEAM_VERSION=2.47.0
 7 | DOCKERFILE_TEMPLATE=pytorch_gpu.Dockerfile
 8 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
 9 | ##Pytorch + Tensor_RT + Py3.8 + Beam 2.46.0
10 | #PYTHON_VERSION=3.8
11 | #BEAM_VERSION=2.46.0
12 | #DOCKERFILE_TEMPLATE=tensor_rt.Dockerfile
13 | ################################################################################
14 | ### GCP SETTINGS
15 | ################################################################################
16 | PROJECT_ID=your-gcp-project-id
17 | REGION=your-region-to-run-dataflow-jobs
18 | ZONE=your-zone-to-run-vm
19 | DISK_SIZE_GB=50
20 | MACHINE_TYPE=n1-standard-2
21 | VM_NAME=beam-ml-starter-gpu
22 | ################################################################################
23 | ### DATAFLOW JOB SETTINGS
24 | ################################################################################
25 | STAGING_LOCATION=your-gcs-bucket-for-staging-files
26 | TEMP_LOCATION=your-gcs-bucket-for-temp-files
27 | CUSTOM_CONTAINER_IMAGE=your-gcr-image-uri-for-custom-container
28 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
29 | ################################################################################
30 | ### DATAFLOW JOB MODEL SETTINGS
31 | ################################################################################
32 | ### PYTORCH MODEL EXAMPLES
33 | ## mobilenet_v2
34 | MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth"
35 | MODEL_NAME=mobilenet_v2
36 | ## resnet101
37 | #MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.resnet101.pth"
38 | #MODEL_NAME=resnet101
39 | ### TF MODEL URI EXAMPLES
40 | #TF_MODEL_URI: only support TF2 models (https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2)
41 | #TF_MODEL_URI=https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4
42 | ################################################################################
43 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
44 | ################################################################################
45 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
46 | OUTPUT_DATA=your-gcs-bucket-for-saving-prediction-results


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 120
 3 | max-complexity = 40
 4 | ignore =
 5 |   E203
 6 |   W503
 7 | exclude =
 8 |     .eggs
 9 |     .git
10 |     .tox
11 |     __pycache__
12 |     build
13 |     dist
14 |     venv


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | name: Build and push Docker image to GCP Artifact Registry
16 | 
17 | on:
18 |   workflow_dispatch:
19 |   push:
20 |     branches:
21 |       - main
22 |   schedule:
23 |     # Every Monday at 1PM UTC (9AM EST)
24 |     - cron: "0 13 * * 1"
25 | 
26 | jobs:
27 |   build-and-push:
28 |     runs-on: ubuntu-latest
29 | 
30 |     steps:
31 |       - name: Free Disk Space (Ubuntu)
32 |         uses: jlumbroso/free-disk-space@main
33 |         with:
34 |           # this might remove tools that are actually needed,
35 |           # if set to "true" but frees about 6 GB
36 |           tool-cache: false
37 | 
38 |           android: true
39 |           dotnet: true
40 |           haskell: true
41 |           large-packages: false
42 |           docker-images: true
43 |           swap-storage: true
44 |       - name: Checkout
45 |         uses: actions/checkout@v3
46 |       - id: "auth"
47 |         name: Authenticate to Google Cloud
48 |         uses: google-github-actions/auth@v1.1.1
49 |         with:
50 |           credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
51 |           token_format: access_token
52 |       - name: Docker login
53 |         uses: "docker/login-action@v1"
54 |         with:
55 |           registry: "us-docker.pkg.dev"
56 |           username: "oauth2accesstoken"
57 |           password: "${{ steps.auth.outputs.access_token }}"
58 |       - name: Set up Python 3.8
59 |         uses: actions/setup-python@v4
60 |         with:
61 |           python-version: "3.8"
62 |       - name: Init env
63 |         run: |
64 |           cp tests/sample.env.tf .env
65 |           echo '${{ steps.auth.outputs.access_token }}' | docker login -u oauth2accesstoken --password-stdin https://us-docker.pkg.dev
66 |           make init
67 |       - name: Build and push Docker image
68 |         run: |
69 |           make docker
70 |       - name: Test Docker image
71 |         run: |
72 |           make run-df-gpu
73 | 


--------------------------------------------------------------------------------
/.github/workflows/docker_test.yml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | name: Build and push Docker image to GCP Artifact Registry with the latest Beam
16 | 
17 | on:
18 |   workflow_dispatch:
19 | 
20 | jobs:
21 |   build-and-push:
22 |     runs-on: ubuntu-latest
23 | 
24 |     steps:
25 |       - name: Free Disk Space (Ubuntu)
26 |         uses: jlumbroso/free-disk-space@main
27 |         with:
28 |           # this might remove tools that are actually needed,
29 |           # if set to "true" but frees about 6 GB
30 |           tool-cache: false
31 | 
32 |           android: true
33 |           dotnet: true
34 |           haskell: true
35 |           large-packages: false
36 |           docker-images: true
37 |           swap-storage: true
38 |       - name: Checkout
39 |         uses: actions/checkout@v3
40 |       - id: "auth"
41 |         name: Authenticate to Google Cloud
42 |         uses: google-github-actions/auth@v1.1.1
43 |         with:
44 |           credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
45 |           token_format: access_token
46 |       - name: Docker login
47 |         uses: "docker/login-action@v1"
48 |         with:
49 |           registry: "us-docker.pkg.dev"
50 |           username: "oauth2accesstoken"
51 |           password: "${{ steps.auth.outputs.access_token }}"
52 |       - name: Set up Python 3.10
53 |         uses: actions/setup-python@v4
54 |         with:
55 |           python-version: "3.10"
56 |       - name: Init env with the test Beam and docker URI
57 |         run: |
58 |           cp tests/sample.env.pytorch .env
59 |           make init-venv
60 |           ./venv/bin/pip install requests packaging
61 |           make test-latest-env
62 |           sed -i '/CUSTOM_CONTAINER_IMAGE=/d' .env
63 |           echo -e "\n" >> .env
64 |           echo "CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/dataflow-ml-starter/pytorch_gpu:test-beam" >> .env
65 |           echo '${{ steps.auth.outputs.access_token }}' | docker login -u oauth2accesstoken --password-stdin https://us-docker.pkg.dev
66 |           make init
67 |       - name: Build and push Docker image
68 |         run: |
69 |           make docker
70 |       - name: Test Docker image
71 |         run: |
72 |           make run-df-gpu
73 | 


--------------------------------------------------------------------------------
/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | name: Run basic tests with Python 3.8
16 | 
17 | on: [push, pull_request, workflow_dispatch]
18 | 
19 | jobs:
20 |   tests:
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |       - uses: actions/checkout@v3
25 |       - name: Set up Python 3.8
26 |         uses: actions/setup-python@v4
27 |         with:
28 |           python-version: "3.8"
29 |       - name: Init env
30 |         run: |
31 |           cp tests/sample.env.tf .env
32 |           make init
33 |       - name: Run local tests
34 |         run: |
35 |           make test
36 |       - name: Run DirectRunner with TF
37 |         run: |
38 |           # tf model
39 |           make run-direct
40 |           test -f beam-output/beam_test_out.txt && echo "DirectRunner ran successfully!" || $(error "Cannot find beam-output/beam_test_out.txt!")
41 |       - name: Run DirectRunner with PyTorch
42 |         run: |
43 |           # torch model
44 |           sed -i '/TF_MODEL_URI=/d' .env
45 |           echo -e "\n" >> .env
46 |           echo "MODEL_STATE_DICT_PATH=gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth" >> .env
47 |           echo -e "\n" >> .env
48 |           echo "MODEL_NAME=mobilenet_v2" >> .env
49 |           make run-direct
50 |           test -f beam-output/beam_test_out.txt && echo "DirectRunner ran successfully!" || $(error "Cannot find beam-output/beam_test_out.txt!")
51 |           # restore .env
52 |           cp tests/sample.env.tf .env
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 Google LLC
  2 | 
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | 
  7 | #       https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | # beam temp
 16 | beam-temp-*
 17 | beam-output/
 18 | Dockerfile
 19 | requirements.txt
 20 | 
 21 | # sys
 22 | .DS_Store
 23 | 
 24 | # Byte-compiled / optimized / DLL files
 25 | __pycache__/
 26 | *.py[cod]
 27 | *$py.class
 28 | 
 29 | # C extensions
 30 | *.so
 31 | 
 32 | # Distribution / packaging
 33 | .Python
 34 | build/
 35 | develop-eggs/
 36 | dist/
 37 | downloads/
 38 | eggs/
 39 | .eggs/
 40 | lib/
 41 | lib64/
 42 | parts/
 43 | sdist/
 44 | var/
 45 | wheels/
 46 | share/python-wheels/
 47 | *.egg-info/
 48 | .installed.cfg
 49 | *.egg
 50 | MANIFEST
 51 | 
 52 | # PyInstaller
 53 | #  Usually these files are written by a python script from a template
 54 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 55 | *.manifest
 56 | *.spec
 57 | 
 58 | # Installer logs
 59 | pip-log.txt
 60 | pip-delete-this-directory.txt
 61 | 
 62 | # Unit test / coverage reports
 63 | htmlcov/
 64 | .tox/
 65 | .nox/
 66 | .coverage
 67 | .coverage.*
 68 | .cache
 69 | nosetests.xml
 70 | coverage.xml
 71 | *.cover
 72 | *.py,cover
 73 | .hypothesis/
 74 | .pytest_cache/
 75 | cover/
 76 | 
 77 | # Translations
 78 | *.mo
 79 | *.pot
 80 | 
 81 | # Django stuff:
 82 | *.log
 83 | local_settings.py
 84 | db.sqlite3
 85 | db.sqlite3-journal
 86 | 
 87 | # Flask stuff:
 88 | instance/
 89 | .webassets-cache
 90 | 
 91 | # Scrapy stuff:
 92 | .scrapy
 93 | 
 94 | # Sphinx documentation
 95 | docs/_build/
 96 | 
 97 | # PyBuilder
 98 | .pybuilder/
 99 | target/
100 | 
101 | # Jupyter Notebook
102 | .ipynb_checkpoints
103 | 
104 | # IPython
105 | profile_default/
106 | ipython_config.py
107 | 
108 | # pyenv
109 | #   For a library or package, you might want to ignore these files since the code is
110 | #   intended to run in multiple environments; otherwise, check them in:
111 | # .python-version
112 | 
113 | # pipenv
114 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
115 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
116 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
117 | #   install all needed dependencies.
118 | #Pipfile.lock
119 | 
120 | # poetry
121 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
122 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
123 | #   commonly ignored for libraries.
124 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
125 | #poetry.lock
126 | 
127 | # pdm
128 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
129 | #pdm.lock
130 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
131 | #   in version control.
132 | #   https://pdm.fming.dev/#use-with-ide
133 | .pdm.toml
134 | 
135 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
136 | __pypackages__/
137 | 
138 | # Celery stuff
139 | celerybeat-schedule
140 | celerybeat.pid
141 | 
142 | # SageMath parsed files
143 | *.sage.py
144 | 
145 | # Environments
146 | .env*
147 | .venv
148 | env/
149 | venv/
150 | ENV/
151 | env.bak/
152 | venv.bak/
153 | 
154 | # vscode
155 | .vscode
156 | 
157 | # Spyder project settings
158 | .spyderproject
159 | .spyproject
160 | 
161 | # Rope project settings
162 | .ropeproject
163 | 
164 | # mkdocs documentation
165 | /site
166 | 
167 | # mypy
168 | .mypy_cache/
169 | .dmypy.json
170 | dmypy.json
171 | 
172 | # Pyre type checker
173 | .pyre/
174 | 
175 | # pytype static type analyzer
176 | .pytype/
177 | 
178 | # Cython debug symbols
179 | cython_debug/
180 | 
181 | # PyCharm
182 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
183 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
184 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
185 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
186 | #.idea/
187 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | [settings]
16 | sections=FUTURE,STDLIB,THIRDPARTY,DFML,FIRSTPARTY,LOCALFOLDER
17 | import_heading_dfml=Dataflow ML libraries
18 | import_heading_stdlib=standard libraries
19 | import_heading_thirdparty=third party libraries
20 | include_trailing_comma=True
21 | indent='    '
22 | known_dfml=my_project
23 | dedup_headings=True
24 | line_length=120
25 | multi_line_output=3
26 | skip=./venv/,./venv-docs/,./.git/


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | exclude: ^docs/notebooks/
16 | repos:
17 | -   repo: https://github.com/ambv/black
18 |     rev: 23.3.0
19 |     hooks:
20 |     - id: black
21 |       args: ["--config=pyproject.toml", "--check", "--diff"]
22 | -   repo: https://github.com/pycqa/flake8
23 |     rev: "6.0.0"
24 |     hooks:
25 |     - id: flake8
26 |       args: ["--config=.flake8"]
27 | -   repo: https://github.com/timothycrosley/isort
28 |     rev: 5.12.0
29 |     hooks:
30 |     - id: isort


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We would love to accept your patches and contributions to this project.
 4 | 
 5 | ## Before you begin
 6 | 
 7 | ### Sign our Contributor License Agreement
 8 | 
 9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 | 
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 | 
18 | Visit <https://cla.developers.google.com/> to see your current agreements or to
19 | sign a new one.
20 | 
21 | ### Review our Community Guidelines
22 | 
23 | This project follows [Google's Open Source Community
24 | Guidelines](https://opensource.google/conduct/).
25 | 
26 | ## Contribution process
27 | 
28 | ### Code Reviews
29 | 
30 | All submissions, including submissions by project members, require review. We
31 | use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests)
32 | for this purpose.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 Google LLC
  2 | 
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | 
  7 | #       https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | SILENT:
 16 | .PHONY:
 17 | .DEFAULT_GOAL := help
 18 | 
 19 | # Load environment variables from .env file
 20 | TF_MODEL_URI :=
 21 | include .env
 22 | export
 23 | 
 24 | define PRINT_HELP_PYSCRIPT
 25 | import re, sys # isort:skip
 26 | 
 27 | matches = []
 28 | for line in sys.stdin:
 29 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
 30 | 	if match:
 31 | 		matches.append(match.groups())
 32 | 
 33 | for target, help in sorted(matches):
 34 |     print("     %-25s %s" % (target, help))
 35 | endef
 36 | export PRINT_HELP_PYSCRIPT
 37 | 
 38 | PYTHON = python$(PYTHON_VERSION)
 39 | 
 40 | ifndef TF_MODEL_URI
 41 | 	MODEL_ENV := "TORCH"
 42 | else
 43 | 	MODEL_ENV := "TF"
 44 | endif
 45 | 
 46 | help: ## Print this help
 47 | 	@echo
 48 | 	@echo "  make targets:"
 49 | 	@echo
 50 | 	@$(PYTHON) -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
 51 | 
 52 | test-latest-env: ## Replace the Beam vesion with the latest version (including release candidates)
 53 | 	$(eval LATEST_VERSION=$(shell ./venv/bin/python3 scripts/get_beam_version.py))
 54 | 	@echo $(LATEST_VERSION)
 55 | 	@sed 's/BEAM_VERSION=.*/BEAM_VERSION=$(LATEST_VERSION)/g' .env > .env.new && mv .env.new .env
 56 | 
 57 | init-venv: ## Create virtual environment in venv folder
 58 | 	@$(PYTHON) -m venv venv
 59 | 
 60 | init: init-venv ## Init virtual environment
 61 | 	@./venv/bin/python3 -m pip install -U pip
 62 | 	@$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g" requirements.prod.txt > requirements.txt)
 63 | 	@./venv/bin/python3 -m pip install -r requirements.txt
 64 | 	@./venv/bin/python3 -m pip install -r requirements.dev.txt
 65 | 	@./venv/bin/python3 -m pre_commit install --install-hooks --overwrite
 66 | 	@mkdir -p beam-output
 67 | 	@echo "use 'source venv/bin/activate' to activate venv "
 68 | 	@./venv/bin/python3 -m pip install -e .
 69 | 
 70 | format: ## Run formatter on source code
 71 | 	@./venv/bin/python3 -m black --config=pyproject.toml .
 72 | 
 73 | lint: ## Run linter on source code
 74 | 	@./venv/bin/python3 -m black --config=pyproject.toml --check .
 75 | 	@./venv/bin/python3 -m flake8 --config=.flake8 .
 76 | 
 77 | clean-lite: ## Remove pycache files, pytest files, etc
 78 | 	@rm -rf build dist .cache .coverage .coverage.* *.egg-info
 79 | 	@find . -name .coverage | xargs rm -rf
 80 | 	@find . -name .pytest_cache | xargs rm -rf
 81 | 	@find . -name .tox | xargs rm -rf
 82 | 	@find . -name __pycache__ | xargs rm -rf
 83 | 	@find . -name *.egg-info | xargs rm -rf
 84 | 
 85 | clean: clean-lite ## Remove virtual environment, downloaded models, etc
 86 | 	@rm -rf venv
 87 | 	@echo "run 'make init'"
 88 | 
 89 | test: lint ## Run tests
 90 | 	./venv/bin/pytest -s -vv --cov=my_project --cov-fail-under=50 tests/
 91 | 
 92 | run-direct: ## Run a local test with DirectRunner
 93 | 	@rm -f beam-output/beam_test_out.txt
 94 | ifeq ($(MODEL_ENV), "TORCH")
 95 | 	time ./venv/bin/python3 -m my_project.run \
 96 | 	--input data/openimage_10.txt \
 97 | 	--output beam-output/beam_test_out.txt \
 98 | 	--model_state_dict_path $(MODEL_STATE_DICT_PATH) \
 99 | 	--model_name $(MODEL_NAME)
100 | else
101 | 	time ./venv/bin/python3 -m my_project.run \
102 | 	--input data/openimage_10.txt \
103 | 	--output beam-output/beam_test_out.txt \
104 | 	--tf_model_uri $(TF_MODEL_URI)
105 | endif
106 | 
107 | docker: ## Build a custom docker image and push it to Artifact Registry
108 | 	@$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g; s|\$${PYTHON_VERSION}|$(PYTHON_VERSION)|g" ${DOCKERFILE_TEMPLATE} > Dockerfile)
109 | 	docker build --platform linux/amd64 -t $(CUSTOM_CONTAINER_IMAGE) -f Dockerfile .
110 | 	docker push $(CUSTOM_CONTAINER_IMAGE)
111 | 
112 | run-df-gpu: ## Run a Dataflow job using the custom container with GPUs
113 | 	$(eval JOB_NAME := beam-ml-starter-gpu-$(shell date +%s)-$(shell echo $$$$))
114 | ifeq ($(MODEL_ENV), "TORCH")
115 | 	time ./venv/bin/python3 -m my_project.run \
116 | 	--runner DataflowRunner \
117 | 	--job_name $(JOB_NAME) \
118 | 	--project $(PROJECT_ID) \
119 | 	--region $(REGION) \
120 | 	--machine_type $(MACHINE_TYPE) \
121 | 	--disk_size_gb $(DISK_SIZE_GB) \
122 | 	--staging_location $(STAGING_LOCATION) \
123 | 	--temp_location $(TEMP_LOCATION) \
124 | 	--setup_file ./setup.py \
125 | 	--device GPU \
126 | 	--dataflow_service_option $(SERVICE_OPTIONS) \
127 | 	--number_of_worker_harness_threads 1 \
128 | 	--experiments=disable_worker_container_image_prepull \
129 | 	--experiments=use_pubsub_streaming \
130 | 	--sdk_container_image $(CUSTOM_CONTAINER_IMAGE) \
131 | 	--sdk_location container \
132 | 	--input $(INPUT_DATA) \
133 | 	--output $(OUTPUT_DATA) \
134 | 	--model_state_dict_path  $(MODEL_STATE_DICT_PATH) \
135 | 	--model_name $(MODEL_NAME)
136 | else
137 | 	time ./venv/bin/python3 -m my_project.run \
138 | 	--runner DataflowRunner \
139 | 	--job_name $(JOB_NAME) \
140 | 	--project $(PROJECT_ID) \
141 | 	--region $(REGION) \
142 | 	--machine_type $(MACHINE_TYPE) \
143 | 	--disk_size_gb $(DISK_SIZE_GB) \
144 | 	--staging_location $(STAGING_LOCATION) \
145 | 	--temp_location $(TEMP_LOCATION) \
146 | 	--setup_file ./setup.py \
147 | 	--device GPU \
148 | 	--dataflow_service_option $(SERVICE_OPTIONS) \
149 | 	--number_of_worker_harness_threads 1 \
150 | 	--experiments=disable_worker_container_image_prepull \
151 | 	--experiments=use_pubsub_streaming \
152 | 	--sdk_container_image $(CUSTOM_CONTAINER_IMAGE) \
153 | 	--sdk_location container \
154 | 	--input $(INPUT_DATA) \
155 | 	--output $(OUTPUT_DATA) \
156 | 	--tf_model_uri $(TF_MODEL_URI)
157 | endif
158 | 
159 | run-df-cpu: ## Run a Dataflow job with CPUs and without Custom Container
160 | 	@$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g" requirements.txt > beam-output/requirements.txt)
161 | 	@$(eval JOB_NAME := beam-ml-starter-cpu-$(shell date +%s)-$(shell echo $$$$))
162 | ifeq ($(MODEL_ENV), "TORCH")
163 | 	time ./venv/bin/python3 -m my_project.run \
164 | 	--runner DataflowRunner \
165 | 	--job_name $(JOB_NAME) \
166 | 	--project $(PROJECT_ID) \
167 | 	--region $(REGION) \
168 | 	--machine_type $(MACHINE_TYPE) \
169 | 	--disk_size_gb $(DISK_SIZE_GB) \
170 | 	--staging_location $(STAGING_LOCATION) \
171 | 	--temp_location $(TEMP_LOCATION) \
172 | 	--requirements_file requirements.txt \
173 | 	--setup_file ./setup.py \
174 | 	--input $(INPUT_DATA) \
175 | 	--output $(OUTPUT_DATA) \
176 | 	--model_state_dict_path  $(MODEL_STATE_DICT_PATH) \
177 | 	--model_name $(MODEL_NAME)
178 | else
179 | 	time ./venv/bin/python3 -m my_project.run \
180 | 	--runner DataflowRunner \
181 | 	--job_name $(JOB_NAME) \
182 | 	--project $(PROJECT_ID) \
183 | 	--region $(REGION) \
184 | 	--machine_type $(MACHINE_TYPE) \
185 | 	--disk_size_gb $(DISK_SIZE_GB) \
186 | 	--staging_location $(STAGING_LOCATION) \
187 | 	--temp_location $(TEMP_LOCATION) \
188 | 	--requirements_file requirements.txt \
189 | 	--setup_file ./setup.py \
190 | 	--input $(INPUT_DATA) \
191 | 	--output $(OUTPUT_DATA) \
192 | 	--tf_model_uri $(TF_MODEL_URI)
193 | endif
194 | 
195 | create-vm: ## Create a VM with GPU to test the docker image
196 | 	@./scripts/create-gpu-vm.sh
197 | 
198 | delete-vm: ## Delete a VM
199 | 	gcloud compute instances delete $(VM_NAME) --project $(PROJECT_ID) --zone $(ZONE) --quiet
200 | 
201 | check-beam: ## Check whether Beam is installed on GPU using VM with Custom Container
202 | 	@./scripts/check-beam.sh
203 | 
204 | check-tf-gpu: ## Check whether Tensorflow works on GPU using VM with Custom Container
205 | 	@./scripts/check-tf-on-gpu.sh
206 | 
207 | check-torch-gpu: ## Check whether PyTorch works on GPU using VM with Custom Container
208 | 	@./scripts/check-torch-on-gpu.sh
209 | 
210 | check-pipeline: ## Check whether the Beam pipeline can run on GPU using VM with Custom Container and DirectRunner
211 | 	@./scripts/check-pipeline.sh
212 | 
213 | create-flex-template: ## Create a Flex Template file using a Flex Template custom container
214 | 	gcloud dataflow flex-template build $(TEMPLATE_FILE_GCS_PATH) \
215 | 	--image $(CUSTOM_CONTAINER_IMAGE) \
216 | 	--metadata-file ./flex/metadata.json \
217 | 	--sdk-language "PYTHON" \
218 | 	--staging-location $(STAGING_LOCATION) \
219 | 	--temp-location $(TEMP_LOCATION) \
220 | 	--project $(PROJECT_ID) \
221 | 	--worker-region $(REGION) \
222 | 	--worker-machine-type $(MACHINE_TYPE)
223 | 
224 | run-df-gpu-flex: ## Run a Dataflow job using the Flex Template
225 | 	$(eval JOB_NAME := beam-ml-starter-gpu-flex-$(shell date +%s)-$(shell echo $$$$))
226 | ifeq ($(MODEL_ENV), "TORCH")
227 | 	gcloud dataflow flex-template run $(JOB_NAME) \
228 | 	--template-file-gcs-location $(TEMPLATE_FILE_GCS_PATH) \
229 | 	--project $(PROJECT_ID) \
230 | 	--region $(REGION) \
231 | 	--worker-machine-type $(MACHINE_TYPE) \
232 | 	--additional-experiments disable_worker_container_image_prepull \
233 | 	--parameters number_of_worker_harness_threads=1 \
234 | 	--parameters sdk_location=container \
235 | 	--parameters sdk_container_image=$(CUSTOM_CONTAINER_IMAGE) \
236 | 	--parameters dataflow_service_option=$(SERVICE_OPTIONS) \
237 | 	--parameters input=$(INPUT_DATA) \
238 | 	--parameters output=$(OUTPUT_DATA) \
239 | 	--parameters device=GPU \
240 | 	--parameters model_state_dict_path=$(MODEL_STATE_DICT_PATH) \
241 | 	--parameters model_name=$(MODEL_NAME)
242 | else
243 | 	gcloud dataflow flex-template run $(JOB_NAME) \
244 | 	--template-file-gcs-location $(TEMPLATE_FILE_GCS_PATH) \
245 | 	--project $(PROJECT_ID) \
246 | 	--region $(REGION) \
247 | 	--worker-machine-type $(MACHINE_TYPE) \
248 | 	--additional-experiments disable_worker_container_image_prepull \
249 | 	--parameters number_of_worker_harness_threads=1 \
250 | 	--parameters sdk_location=container \
251 | 	--parameters sdk_container_image=$(CUSTOM_CONTAINER_IMAGE) \
252 | 	--parameters dataflow_service_option=$(SERVICE_OPTIONS) \
253 | 	--parameters input=$(INPUT_DATA) \
254 | 	--parameters output=$(OUTPUT_DATA) \
255 | 	--parameters device=GPU \
256 | 	--parameters tf_model_uri=$(TF_MODEL_URI)
257 | endif


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dataflow ML Starter Project
  2 | 
  3 | ## Summary
  4 | This repo contains a simple Beam RunInference project, which demonstrates how to run this Beam pipeline using DirectRunner to develop and test
  5 | and launch the production job using DataflowRunner on either CPUs or GPUs. It can be served as a boilerplate to create a new Dataflow ML project.
  6 | 
  7 | **This is not an officially supported Google product**.
  8 | 
  9 | ## Prerequisites
 10 | 
 11 | * conda
 12 | * git
 13 | * make
 14 | * docker
 15 | * gcloud
 16 | * python3-venv
 17 | 
 18 | ```bash
 19 | sudo apt-get update
 20 | sudo apt-get install -y python3-venv git make time wget
 21 | ```
 22 | Install Docker on Debian: https://docs.docker.com/engine/install/debian/
 23 | Without sudo,
 24 | ```bash
 25 | sudo groupadd docker
 26 | sudo usermod -aG docker $USER
 27 | newgrp docker
 28 | ```
 29 | 
 30 | ## Directory structure
 31 | ```
 32 | .
 33 | ├── LICENSE
 34 | ├── .env.template           <- A configuration template file to define environment-specific variables
 35 | ├── Makefile                <- Makefile with commands and type `make` to get the command list
 36 | ├── README.md               <- The top-level README for developers using this project
 37 | ├── data                    <- Any data for local development and testing
 38 | │   └── openimage_10.txt    <- A sample test data that contains the gcs file path for each image
 39 | ├── pyproject.toml          <- The TOML format Python project configuration file
 40 | ├── requirements.dev.txt    <- Packages for the development such as `pytest`
 41 | ├── requirements.prod.txt   <- Packages for the production environment and produces `requirements.txt`
 42 | ├── scripts                 <- utility bash scripts
 43 | ├── setup.py                <- Used in `python setup.py sdist` to create the multi-file python package
 44 | ├── my_project              <- Source code for use in this project, also your python package module name
 45 | │   ├── __init__.py         <- Makes my_project a Python package
 46 | │   ├── config.py           <- `pydantic` model classes to define sources, sinks, and models
 47 | │   ├── pipeline.py         <- Builds the Beam RunInference pipeline
 48 | │   └── run.py              <- A run module to parse the command options and run the Beam pipeline
 49 | ├── tensor_rt.Dockerfile    <- A Dockerfile to create a customer container with TensorRT
 50 | └── tests                   <- Tests to cover local developments
 51 |     └── test_pipeline.py
 52 | ```
 53 | 
 54 | ## User Guide
 55 | 
 56 | **This process is only tested on GCE VMs with Debian.**
 57 | 
 58 | ### Step 1: Clone this repo and edit .env
 59 | 
 60 | ```bash
 61 | git clone https://github.com/google/dataflow-ml-starter.git
 62 | cd df-ml-starter
 63 | cp .env.template .env
 64 | ```
 65 | Use your editor to fill in the information in the `.env` file.
 66 | 
 67 | If you want to try other pytorch models under `gs://apache-beam-ml/models/`,
 68 | ```bash
 69 | gsutil ls gs://apache-beam-ml/models/
 70 | ```
 71 | you need to edit `config.py` to add more model names.
 72 | 
 73 | It is highly recommended to run through this guide once using `mobilenet_v2` for image classification.
 74 | 
 75 | All the useful actions can be triggered using `make`:
 76 | ```console
 77 | $ make
 78 | 
 79 | make targets:
 80 | 
 81 |      check-beam                Check whether Beam is installed on GPU using VM with Custom Container
 82 |      check-pipeline            Check whether the Beam pipeline can run on GPU using VM with Custom Container and DirectRunner
 83 |      check-tf-gpu              Check whether Tensorflow works on GPU using VM with Custom Container
 84 |      check-torch-gpu           Check whether PyTorch works on GPU using VM with Custom Container
 85 |      clean                     Remove virtual environment, downloaded models, etc
 86 |      clean-lite                Remove pycache files, pytest files, etc
 87 |      create-flex-template      Create a Flex Template file using a Flex Template custom container
 88 |      create-vm                 Create a VM with GPU to test the docker image
 89 |      delete-vm                 Delete a VM
 90 |      docker                    Build a custom docker image and push it to Artifact Registry
 91 |      format                    Run formatter on source code
 92 |      help                      Print this help
 93 |      init                      Init virtual environment
 94 |      init-venv                 Create virtual environment in venv folder
 95 |      lint                      Run linter on source code
 96 |      run-df-cpu                Run a Dataflow job with CPUs and without Custom Container
 97 |      run-df-gpu                Run a Dataflow job using the custom container with GPUs
 98 |      run-df-gpu-flex           Run a Dataflow job using the Flex Template
 99 |      run-direct                Run a local test with DirectRunner
100 |      test                      Run tests
101 |      test-latest-env           Replace the Beam vesion with the latest version (including release candidates)
102 | ```
103 | 
104 | ### Pipeline Details
105 | 
106 | This project contains a simple RunInference Beam pipeline,
107 | ```
108 | Read the GCS file that contains image GCS paths (beam.io.ReadFromText) ->
109 | Pre-process the input image, run a Pytorch or Tensorflow image classification model, post-process the results -->
110 | Write all predictions back to the GCS output file
111 | ```
112 | The input image data is created from the ImageNet images.
113 | 
114 | The entire code flows in this way:
115 | 
116 | * `.env` defines the environment variables such as Torch or TF models, model name, Dockerfile template, etc.
117 | * `Makefile` reads these environment variables from `.env` and based on the make targets, it can run tests, build docker images, run Dataflow jobs with CPUs or GPUs.
118 | * `run.py` is called by the`Makefile` targets to parse the input arguments and set `ModelConfig`, `SourceConfig`, and `SinkConfig` defined in `config.py`, then calls `build_pipeline` from `pipeline.py` to build the final Beam pipeline
119 | 
120 | 
121 | To customize the pipeline, modify `build_pipeline` in [pipeline.py](https://github.com/google/dataflow-ml-starter/blob/main/my_project/pipeline.py). It defines how to read the image data from TextIO, pre-process the images, score them, post-process the predictions,
122 | and at last save the results using TextIO.
123 | 
124 | [config.py](https://github.com/google/dataflow-ml-starter/blob/main/my_project/config.py) contains a set of `pydantic` models to specify the configurations for sources, sinks, and models and validate them. Users can easily add more Pytorch classification models. [Here](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/inference) contains more examples.
125 | 
126 | ### `.env` Details
127 | 
128 | Most of options are configured by the `.env` file.
129 | Below is one example to use the Pytorch `mobilenet_v2` model for image classification:
130 | ```
131 | ################################################################################
132 | ### PYTHON SDK SETTINGS
133 | ################################################################################
134 | PYTHON_VERSION=3.10
135 | BEAM_VERSION=2.48.0
136 | DOCKERFILE_TEMPLATE=pytorch_gpu.Dockerfile
137 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
138 | ################################################################################
139 | ### GCP SETTINGS
140 | ################################################################################
141 | PROJECT_ID=apache-beam-testing
142 | REGION=us-central1
143 | DISK_SIZE_GB=50
144 | MACHINE_TYPE=n1-standard-2
145 | VM_NAME=beam-ml-starter-gpu-1
146 | ################################################################################
147 | ### DATAFLOW JOB SETTINGS
148 | ################################################################################
149 | STAGING_LOCATION=gs://temp-storage-for-perf-tests/loadtests
150 | TEMP_LOCATION=gs://temp-storage-for-perf-tests/loadtests
151 | CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/xqhu/pytorch_gpu:latest
152 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
153 | ################################################################################
154 | ### DATAFLOW JOB MODEL SETTINGS
155 | ################################################################################
156 | MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth"
157 | MODEL_NAME=mobilenet_v2
158 | ################################################################################
159 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
160 | ################################################################################
161 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
162 | OUTPUT_DATA="gs://temp-storage-for-end-to-end-tests/torch/result_gpu_xqhu.txt"
163 | ```
164 | Most of options are intuitive. `DOCKERFILE_TEMPLATE` provides the Dockerfile template that will be used to build the custom container. `CUSTOM_CONTAINER_IMAGE` is the Docker image storage location.
165 | In default, we use GPUs (i.e., T4) with the custom container defined by `SERVICE_OPTIONS` for this Dataflow job. `MODEL_STATE_DICT_PATH` and `MODEL_NAME` defines the Pytorch model information. For this Beam pipeline, we use the GCS buckets for input and output data.
166 | 
167 | ### Custom container
168 | We provide three Dockerfile templates as examples to show how to build a custom container:
169 | |Name|Description|
170 | |---|---|
171 | |tensor_rt.Dockerfile| TensorRT + Python 3.8|
172 | |pytorch_gpu.Dockerfile| Pytorch with GPUs + Python 3.10|
173 | |tensorflow_gpu.Dockerfile | Tensorflow with GPUs + Python 3.8|
174 | 
175 | Note You should keep your local Python environment same as the one defined in Dockerfile.
176 | These Dockerfile examples should be customized based on your project requirements.
177 | 
178 | ### Step 2: Initialize a venv for your project
179 | ```bash
180 | make init
181 | source venv/bin/activate
182 | ```
183 | Note you must make sure the base Python version matches the version defined in `.env`.
184 | The base python can be configured using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/linux.html), e.g.,
185 | ```bash
186 | conda create --name py38 python=3.8
187 | conda activate py38
188 | ```
189 | If anything goes wrong, you can rebuild the `venv`,
190 | ```bash
191 | make clean
192 | make init
193 | ```
194 | To check the `venv` is created correctly,
195 | ```bash
196 | make test
197 | ```
198 | 
199 | ### Step 3: Test the Beam pipeline using DirectRunner
200 | `DirectRunner` provides the local way to validate whether your Beam pipeline works correctly,
201 | ```bash
202 | make run-direct
203 | ```
204 | 
205 | ### Step 4: Run the Beam pipeline using DataflowRunner
206 | To run a Dataflow job using CPUs without a custom container, try this:
207 | ```bash
208 | make run-df-cpu
209 | ```
210 | When using resnet101 to score 50k images, the job took ~30m and cost ~1.4$ with resnet101.
211 | For `mobilenet_v2`, it cost 0.5$ with ~22m.
212 | Note the cost and time depends on your job settings and the regions.
213 | 
214 | #### Build Custom Container with GPU supports
215 | Running Dataflow GPU jobs needs to build a custom container,
216 | ```bash
217 | make docker
218 | ```
219 | The final docker image will be pushed to Artifact Registry. For this guide,
220 | we use `tensor_rt.Dockerfile` to demonstrate how to build the container to run the inference on GPUs with TensorRT.
221 | **Note given the base image issue for TensorRT, only Python 3.8 should be used when running GPUs.**
222 | You can follow [this doc](https://cloud.google.com/dataflow/docs/gpu/use-gpus#custom-container) to create other GPU containers.
223 | 
224 | #### Test Custom Container using GCE VM
225 | It is highly recommended to test your custom container locally before running it with Dataflow,
226 | ```bash
227 | make create-vm
228 | ```
229 | This creates a GCE VM with a T4 GPU and installs nvidia driver. It will take a few minutes.
230 | Now using this VM allows you to test whether the docker container is built correctly,
231 | ```bash
232 | # check whether Beam is installed in Custom Container
233 | make check-beam
234 | # check whether Tensorflow can use GPUs in Custom Container
235 | make check-tf-gpu
236 | # check whether PyTorch can use GPUs in Custom Container
237 | make check-torch-gpu
238 | # check whether DirectRunner can run on GPUs in Custom Container
239 | make check-pipeline
240 | ```
241 | Note these commands will take some time to download your container.
242 | You should see outputs similar to these:
243 | ```bash
244 | Checking Python version on VM...
245 | Python 3.8.10
246 | Checking venv exists on VM...
247 | python3-venv/now 3.8.2-0ubuntu2 amd64 [installed,local]
248 | Checking Beam Version on VM...
249 | 2.48.0
250 | Checking Tensorflow on GPU...
251 | [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
252 | Checking PyTorch on GPU...
253 | True
254 | Tesla T4
255 | ...
256 | The DirectRunner run succeeded on GPU!
257 | ```
258 | The last line will display whether the pipeline can run successfully on VM GPUs in Custom Container.
259 | 
260 | After finishing tests, you can remove this VM,
261 | ```bash
262 | make delete-vm
263 | ```
264 | 
265 | #### Run the Beam pipeline using DataflowRunner on GPUs
266 | This runs a Dataflow job with GPUs,
267 | ```bash
268 | make run-df-gpu
269 | ```
270 | When using resnet101 to score 50k images, the job took ~1h and cost ~0.5$ with resnet101.
271 | For `mobilenet_v2`, it cost 0.05$ with ~1h.
272 | Note the cost and time depends on your job settings and the regions.
273 | 
274 | ### Run the Beam pipeline with the Pub/Sub source
275 | When `INPUT_DATA` from the `.env` file defines a valid Pub/Sub topic (e.g., `projects/apache-beam-testing/topics/Imagenet_openimage_50k_benchmark`),
276 | the Beam pipeline is created using the Pub/Sub source with `FixedWindows` and switches to `beam.io.fileio.WriteToFiles` that supports the streaming pipeline.
277 | Note for this toy example, writing the predictions to a GCS bucket is not efficient since the file size is quite small with few bytes.
278 | In practice, you might tune up [the autoscaling options](https://cloud.google.com/dataflow/docs/guides/troubleshoot-autoscaling) to optimize the streaming pipeline performance.
279 | Note that the streaming job will run forever until it is canceled or drained.
280 | 
281 | ### Run the Beam pipeline with Dataflow Flex Templates
282 | If you prefer to package all your code into a custom container and allow users to easily access your Beam pipeline,
283 | Dataflow Flex Template could be handy to create and run a Flex Template job using Google Cloud CLI or Google Cloud console.
284 | More importantly, building the flex templates container from the custom SDK container image can produce a reproducible launch environment that is [compatible with the runtime environment](https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/#make-the-launch-environment-compatible-with-the-runtime-environment).
285 | (More benefits about templates are [here](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates#benefits).)
286 | 
287 | Since the custom container is already created, it is straightforward to adapt Dataflow Flex Templates:
288 | 1. create a [`metadata.json`](https://github.com/google/dataflow-ml-starter/blob/main/flex/metadata.json) file that contains the parameters required by your Beam pipeline. In this example, we can add `input`, `output`, `device`, `model_name`, `model_state_dict_path`, and `tf_model_uri` as the parameters that can be passed in by users. [Here](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates#example-metadata-file) is another example metadata file.
289 | 2. convert the custom container to your template container following [this](https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates#use_custom_container_images). [`tensorflow_gpu.flex.Dockerfile`](https://github.com/google/dataflow-ml-starter/blob/main/tensorflow_gpu.flex.Dockerfile) is one example converted from `tensorflow_gpu.Dockerfile`. Only two parts are needed: switch to the Dataflow Template launcher entrypoint and package `my_project` into this container. Change `CUSTOM_CONTAINER_IMAGE` in `.env` and run `make docker` to create the custom container for Flex Templates.
290 | 3. `make create-flex-template` creates a template spec file in a Cloud Storage bucket defined by the env `TEMPLATE_FILE_GCS_PATH` that contains all of the necessary information to run the job, such as the SDK information and metadata. This calls the CLI `gcloud dataflow flex-template build`.
291 | 4. `make run-df-gpu-flex` runs a Flex Template pipeline using the spec file from `TEMPLATE_FILE_GCS_PATH`. This calls the CLI `gcloud dataflow flex-template run`.
292 | 
293 | More information about Flex Templates can be found [here](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates).
294 | 
295 | 
296 | ## FAQ
297 | 
298 | ### Permission error when using any GCP command
299 | ```bash
300 | gcloud auth login
301 | gcloud auth application-default login
302 | # replace it with the appropriate region
303 | gcloud auth configure-docker us-docker.pkg.dev
304 | # or if you use docker-credential-gcr
305 | docker-credential-gcr configure-docker --registries=us-docker.pkg.dev
306 | ```
307 | Make sure you specify the appropriate region for Artifact Registry.
308 | 
309 | ### AttributeError: Can't get attribute 'default_tensor_inference_fn'
310 | ```
311 | AttributeError: Can't get attribute 'default_tensor_inference_fn' on <module 'apache_beam.ml.inference.pytorch_inference' from '/usr/local/lib/python3.8/dist-packages/apache_beam/ml/inference/pytorch_inference.py'>
312 | ```
313 | This error indicates your Dataflow job uses the old Beam SDK. If you use `--sdk_location container`, it means your Docker container has the old Beam SDK.
314 | 
315 | ### QUOTA_EXCEEDED
316 | ```
317 | Startup of the worker pool in zone us-central1-a failed to bring up any of the desired 1 workers. Please refer to https://cloud.google.com/dataflow/docs/guides/common-errors#worker-pool-failure for help troubleshooting. QUOTA_EXCEEDED: Instance 'benchmark-tests-pytorch-i-05041052-ufe3-harness-ww4n' creation failed: Quota 'NVIDIA_T4_GPUS' exceeded. Limit: 32.0 in region us-central1.
318 | ```
319 | Please check https://cloud.google.com/compute/docs/regions-zones and select another zone with your desired machine type to relaunch the Dataflow job.
320 | 
321 | ### ERROR: failed to solve: failed to fetch anonymous token: unexpected status: 401 Unauthorized
322 | ```
323 | failed to solve with frontend dockerfile.v0: failed to create LLB definition: failed to authorize: rpc error: code = Unknown desc = failed to fetch anonymous token: unexpected status: 401 Unauthorized
324 | ```
325 | Restarting the docker could resolve this issue.
326 | 
327 | ### Check the built container
328 | ```bash
329 | docker run --rm -it --entrypoint=/bin/bash $CUSTOM_CONTAINER_IMAGE
330 | ```
331 | 
332 | ### Errors could happen when the custom container is not built correctly
333 | 
334 | Check Cloud Logs, pay attention to INFO for Worker logs:
335 | ```
336 | INFO 2023-05-06T15:13:01.237562007Z The virtual environment was not created successfully because ensurepip is not
337 | INFO 2023-05-06T15:13:01.237601258Z available. On Debian/Ubuntu systems, you need to install the python3-venv
338 | INFO 2023-05-06T15:13:01.237607714Z package using the following command.
339 | ```
340 | or (might be caused by building the container on Mac OS)
341 | ```
342 | exec /opt/apache/beam/boot: no such file or directory
343 | ```
344 | 
345 | ## Useful Links
346 | * https://cloud.google.com/dataflow/docs/guides/using-custom-containers#docker
347 | * https://cloud.google.com/dataflow/docs/gpu/use-gpus#custom-container
348 | * https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/
349 | * https://github.com/apache/beam/blob/master/.test-infra/jenkins/job_InferenceBenchmarkTests_Python.groovy
350 | * https://cloud.google.com/dataflow/docs/gpu/troubleshoot-gpus#debug-vm
351 | * https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/dataflow/flex-templates/streaming_beam
352 | * https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates
353 | * https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates#use_custom_container_images


--------------------------------------------------------------------------------
/data/openimage_10.txt:
--------------------------------------------------------------------------------
 1 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec63d33df5e91fd.jpg
 2 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec64bfcb2d515c9.jpg
 3 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec67f239007cb18.jpg
 4 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6988f60e8e881.jpg
 5 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6bf3c1551224a.jpg
 6 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6c1055bae51f5.jpg
 7 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6c15d60358c85.jpg
 8 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6ca007effdd80.jpg
 9 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec7096df9477315.jpg
10 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec70b2abe194c75.jpg
11 | 


--------------------------------------------------------------------------------
/flex/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Beam RunInference Python flex template",
 3 |     "description": "Beam RunInference example for python flex template.",
 4 |     "parameters": [
 5 |       {
 6 |         "name": "input",
 7 |         "label": "Input data",
 8 |         "helpText": "Input image URI data that could be a GCS bucket or pub/sub topic"
 9 |       },
10 |       {
11 |         "name": "output",
12 |         "label": "Output GCS bucket path",
13 |         "helpText": "A GCS bucket that stores the model predictions"
14 |       },
15 |       {
16 |         "name": "tf_model_uri",
17 |         "label": "TensorFlow model URI",
18 |         "helpText": "A valid TensorFlow model URI",
19 |         "isOptional": true
20 |       },
21 |       {
22 |         "name": "model_name",
23 |         "label": "a Pytorch model name",
24 |         "helpText": "A model name, e.g. resnet101",
25 |         "isOptional": true
26 |       },
27 |       {
28 |         "name": "model_state_dict_path",
29 |         "label": "a Pytorch model state path",
30 |         "helpText": "Path to the model's state_dict",
31 |         "isOptional": true
32 |       },
33 |       {
34 |         "name": "device",
35 |         "label": "device to run models",
36 |         "helpText": "device could be either CPU or GPU",
37 |         "isOptional": true
38 |       },
39 |       {
40 |         "name": "disk_size_gb",
41 |         "label": "disk_size_gb",
42 |         "helpText": "disk_size_gb for worker",
43 |         "isOptional": true
44 |       },
45 |       {
46 |         "name": "dataflow_service_option",
47 |         "label": "dataflow_service_option",
48 |         "helpText": "dataflow_service_option for worker",
49 |         "isOptional": true
50 |       }
51 |     ]
52 |   }


--------------------------------------------------------------------------------
/my_project/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 


--------------------------------------------------------------------------------
/my_project/config.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # standard libraries
16 | import re
17 | from enum import Enum
18 | 
19 | # third party libraries
20 | from pydantic import BaseModel, Field, root_validator, validator
21 | 
22 | 
23 | class ModelName(str, Enum):
24 |     RESNET101 = "resnet101"
25 |     MOBILENET_V2 = "mobilenet_v2"
26 | 
27 | 
28 | class ModelConfig(BaseModel):
29 |     model_state_dict_path: str = Field(None, description="path that contains the torch model state directory")
30 |     model_class_name: ModelName = Field(None, description="Reference to the class definition of the model.")
31 |     model_params: dict = Field(
32 |         None,
33 |         description="Parameters passed to the constructor of the model_class. "
34 |         "These will be used to instantiate the model object in the RunInference API.",
35 |     )
36 |     tf_model_uri: str = Field(None, description="TF model uri from https://tfhub.dev/")
37 |     device: str = Field("CPU", description="Device to be used on the Runner. Choices are (CPU, GPU)")
38 |     min_batch_size: int = 10
39 |     max_batch_size: int = 100
40 | 
41 |     @root_validator
42 |     def validate_fields(cls, values):
43 |         v = values.get("model_state_dict_path")
44 |         if v and values.get("tf_model_uri"):
45 |             raise ValueError("Cannot specify both model_state_dict_path and tf_model_uri")
46 |         if v is None and values.get("tf_model_uri") is None:
47 |             raise ValueError("At least one of model_state_dict_path or tf_model_uri must be specified")
48 |         if v and values.get("model_class_name") is None:
49 |             raise ValueError("model_class_name must be specified when using model_state_dict_path")
50 |         if v and values.get("model_params") is None:
51 |             raise ValueError("model_params must be specified when using model_state_dict_path")
52 |         return values
53 | 
54 | 
55 | def _validate_topic_path(topic_path):
56 |     pattern = r"projects/.+/topics/.+"
57 |     return bool(re.match(pattern, topic_path))
58 | 
59 | 
60 | class SourceConfig(BaseModel):
61 |     input: str = Field(..., description="the input path to a text file or a Pub/Sub topic")
62 |     images_dir: str = Field(
63 |         None,
64 |         description="Path to the directory where images are stored."
65 |         "Not required if image names in the input file have absolute path.",
66 |     )
67 |     streaming: bool = False
68 | 
69 |     @validator("streaming", pre=True, always=True)
70 |     def set_streaming(cls, v, values):
71 |         return _validate_topic_path(values["input"])
72 | 
73 | 
74 | class SinkConfig(BaseModel):
75 |     output: str = Field(..., description="the output path to save results as a text file")
76 | 


--------------------------------------------------------------------------------
/my_project/pipeline.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 Google LLC
  2 | 
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | 
  7 | #       https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | """A pipeline that uses RunInference API to perform image classification."""
 16 | 
 17 | # standard libraries
 18 | import io
 19 | import os
 20 | from typing import Iterable, Iterator, Optional, Tuple, Union
 21 | 
 22 | # third party libraries
 23 | import apache_beam as beam
 24 | import numpy as np
 25 | import torch
 26 | import torch.nn as nn
 27 | from apache_beam.io.filesystems import FileSystems
 28 | from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference
 29 | from apache_beam.ml.inference.pytorch_inference import PytorchModelHandlerTensor
 30 | from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerTensor
 31 | from PIL import Image
 32 | from torchvision import models, transforms
 33 | 
 34 | # Dataflow ML libraries
 35 | from my_project.config import ModelConfig, ModelName, SinkConfig, SourceConfig
 36 | 
 37 | import tensorflow as tf  # isort:skip
 38 | 
 39 | 
 40 | def get_model_class(model_name: ModelName) -> nn.Module:
 41 |     model_dict = {ModelName.RESNET101: models.resnet101, ModelName.MOBILENET_V2: models.mobilenet_v2}
 42 | 
 43 |     model_class = model_dict.get(model_name)
 44 |     if not model_class:
 45 |         raise ValueError(f"cannot recognize the model {model_name}")
 46 |     return model_class
 47 | 
 48 | 
 49 | def read_image(image_file_name: Union[str, bytes], path_to_dir: Optional[str] = None) -> Tuple[str, Image.Image]:
 50 |     if isinstance(image_file_name, bytes):
 51 |         image_file_name = image_file_name.decode()
 52 |     if path_to_dir is not None:
 53 |         image_file_name = os.path.join(path_to_dir, image_file_name)
 54 |     with FileSystems().open(image_file_name, "r") as file:
 55 |         data = Image.open(io.BytesIO(file.read())).convert("RGB")
 56 |         return image_file_name, data
 57 | 
 58 | 
 59 | def preprocess_image(data: Image.Image) -> torch.Tensor:
 60 |     image_size = (224, 224)
 61 |     # Pre-trained PyTorch models expect input images normalized with the
 62 |     # below values (see: https://pytorch.org/vision/stable/models.html)
 63 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 64 |     transform = transforms.Compose(
 65 |         [
 66 |             transforms.Resize(image_size),
 67 |             transforms.ToTensor(),
 68 |             normalize,
 69 |         ]
 70 |     )
 71 |     return transform(data)
 72 | 
 73 | 
 74 | def preprocess_image_for_tf(data: Image.Image) -> tf.Tensor:
 75 |     # Convert the input image to the type and dimensions required by the model.
 76 | 
 77 |     img = data.resize((224, 224))
 78 |     img = np.array(img) / 255.0
 79 | 
 80 |     return tf.cast(tf.convert_to_tensor(img[...]), dtype=tf.float32)
 81 | 
 82 | 
 83 | def filter_empty_lines(text: str) -> Iterator[str]:
 84 |     if len(text.strip()) > 0:
 85 |         yield text
 86 | 
 87 | 
 88 | class PostProcessor(beam.DoFn):
 89 |     def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
 90 |         filename, prediction_result = element
 91 |         if isinstance(prediction_result.inference, torch.Tensor):
 92 |             prediction = torch.argmax(prediction_result.inference, dim=0)
 93 |         else:
 94 |             prediction = np.argmax(prediction_result.inference)
 95 |         yield filename + "," + str(prediction.item())
 96 | 
 97 | 
 98 | def build_pipeline(pipeline, source_config: SourceConfig, sink_config: SinkConfig, model_config: ModelConfig) -> None:
 99 |     """
100 |     Args:
101 |       pipeline: a given input pipeline
102 |       source_config: a source config
103 |       sink_config: a sink config
104 |       model_config: a model config to instantiate PytorchModelHandlerTensor
105 |     """
106 | 
107 |     # In this example we pass keyed inputs to RunInference transform.
108 |     # Therefore, we use KeyedModelHandler wrapper over PytorchModelHandler or TFModelHandlerTensor.
109 |     if model_config.model_state_dict_path:
110 |         model_handler = KeyedModelHandler(
111 |             PytorchModelHandlerTensor(
112 |                 state_dict_path=model_config.model_state_dict_path,
113 |                 model_class=get_model_class(model_config.model_class_name),
114 |                 model_params=model_config.model_params,
115 |                 device=model_config.device,
116 |                 min_batch_size=model_config.min_batch_size,
117 |                 max_batch_size=model_config.max_batch_size,
118 |             )
119 |         )
120 |     elif model_config.tf_model_uri:
121 |         model_handler = KeyedModelHandler(
122 |             TFModelHandlerTensor(
123 |                 model_uri=model_config.tf_model_uri,
124 |                 device=model_config.device,
125 |                 min_batch_size=model_config.min_batch_size,
126 |                 max_batch_size=model_config.max_batch_size,
127 |             )
128 |         )
129 |     else:
130 |         raise ValueError("Only support PytorchModelHandler and TFModelHandlerTensor!")
131 | 
132 |     if source_config.streaming:
133 |         # read the text file path from Pub/Sub and use FixedWindows to group these images
134 |         # and then run the model inference and store the results into GCS
135 |         filename_value_pair = (
136 |             pipeline
137 |             | "ReadImageNamesFromPubSub" >> beam.io.ReadFromPubSub(topic=source_config.input)
138 |             | "Window into fixed intervals" >> beam.WindowInto(beam.window.FixedWindows(60 * 5))
139 |             | "ReadImageData" >> beam.Map(lambda image_name: read_image(image_file_name=image_name))
140 |         )
141 |     else:
142 |         # read the text file and create the pair of input data with the file name and its image
143 |         filename_value_pair = (
144 |             pipeline
145 |             | "ReadImageNames" >> beam.io.ReadFromText(source_config.input)
146 |             | "FilterEmptyLines" >> beam.ParDo(filter_empty_lines)
147 |             | "ReadImageData"
148 |             >> beam.Map(lambda image_name: read_image(image_file_name=image_name, path_to_dir=source_config.images_dir))
149 |         )
150 | 
151 |     if model_config.model_state_dict_path:
152 |         filename_value_pair = filename_value_pair | "PreprocessImages" >> beam.MapTuple(
153 |             lambda file_name, data: (file_name, preprocess_image(data))
154 |         )
155 |     else:
156 |         filename_value_pair = filename_value_pair | "PreprocessImages_TF" >> beam.MapTuple(
157 |             lambda file_name, data: (file_name, preprocess_image_for_tf(data))
158 |         )
159 | 
160 |     # do the model inference and postprocessing
161 |     predictions = (
162 |         filename_value_pair
163 |         | "RunInference" >> RunInference(model_handler)
164 |         | "ProcessOutput" >> beam.ParDo(PostProcessor())
165 |     )
166 | 
167 |     # combine all the window results into one text for GCS
168 |     if source_config.streaming:
169 |         (
170 |             predictions
171 |             | "WriteOutputToGCS"
172 |             >> beam.io.fileio.WriteToFiles(sink_config.output, shards=0)  # pylint: disable=expression-not-assigned
173 |         )
174 |     else:
175 |         # save the predictions to a text file
176 |         predictions | "WriteOutputToGCS" >> beam.io.WriteToText(  # pylint: disable=expression-not-assigned
177 |             sink_config.output, shard_name_template="", append_trailing_newlines=True
178 |         )
179 | 


--------------------------------------------------------------------------------
/my_project/run.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 Google LLC
  2 | 
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | 
  7 | #       https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | """A run module that runs a Beam pipeline to perform image classification."""
 16 | 
 17 | # standard libraries
 18 | import argparse
 19 | import logging
 20 | 
 21 | # third party libraries
 22 | import apache_beam as beam
 23 | from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions
 24 | from apache_beam.runners.runner import PipelineResult
 25 | 
 26 | # Dataflow ML libraries
 27 | from my_project.config import ModelConfig, SinkConfig, SourceConfig
 28 | from my_project.pipeline import build_pipeline
 29 | 
 30 | 
 31 | def parse_known_args(argv):
 32 |     """Parses args for the workflow."""
 33 |     parser = argparse.ArgumentParser()
 34 |     parser.add_argument("--input", dest="input", required=True, help="Path to the text file containing image names.")
 35 |     parser.add_argument(
 36 |         "--output", dest="output", required=True, help="Path where to save output predictions." " text file."
 37 |     )
 38 |     parser.add_argument(
 39 |         "--model_state_dict_path", dest="model_state_dict_path", required=False, help="Path to the model's state_dict."
 40 |     )
 41 |     parser.add_argument("--model_name", dest="model_name", required=False, help="model name, e.g. resnet101")
 42 |     parser.add_argument(
 43 |         "--tf_model_uri", dest="tf_model_uri", required=False, help="tfhub model URI from https://tfhub.dev/"
 44 |     )
 45 |     parser.add_argument(
 46 |         "--images_dir",
 47 |         default=None,
 48 |         help="Path to the directory where images are stored."
 49 |         "Not required if image names in the input file have absolute path.",
 50 |     )
 51 |     parser.add_argument(
 52 |         "--device",
 53 |         default="CPU",
 54 |         help="Device to be used on the Runner. Choices are (CPU, GPU).",
 55 |     )
 56 |     return parser.parse_known_args(argv)
 57 | 
 58 | 
 59 | def run(argv=None, save_main_session=True, test_pipeline=None) -> PipelineResult:
 60 |     """
 61 |     Args:
 62 |       argv: Command line arguments defined for this example.
 63 |       save_main_session: Used for internal testing.
 64 |       test_pipeline: Used for internal testing.
 65 |     """
 66 |     known_args, pipeline_args = parse_known_args(argv)
 67 | 
 68 |     # setup configs
 69 |     model_config = ModelConfig(
 70 |         model_state_dict_path=known_args.model_state_dict_path,
 71 |         model_class_name=known_args.model_name,
 72 |         model_params={"num_classes": 1000},
 73 |         tf_model_uri=known_args.tf_model_uri,
 74 |         device=known_args.device,
 75 |     )
 76 | 
 77 |     source_config = SourceConfig(input=known_args.input)
 78 |     sink_config = SinkConfig(output=known_args.output)
 79 | 
 80 |     # setup pipeline
 81 |     pipeline_options = PipelineOptions(pipeline_args, streaming=source_config.streaming)
 82 |     pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
 83 | 
 84 |     pipeline = test_pipeline
 85 |     if not test_pipeline:
 86 |         pipeline = beam.Pipeline(options=pipeline_options)
 87 | 
 88 |     # build the pipeline using configs
 89 |     build_pipeline(pipeline, source_config=source_config, sink_config=sink_config, model_config=model_config)
 90 | 
 91 |     # run it
 92 |     result = pipeline.run()
 93 |     result.wait_until_finish()
 94 |     return result
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     logging.getLogger().setLevel(logging.INFO)
 99 |     run()
100 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | [tool.black]
16 | line-length = 120
17 | include = '\.pyi?$'
18 | exclude = '''
19 | 
20 | (
21 |   /(
22 |       \.eggs         # exclude a few common directories in the
23 |     | \.git          # root of the project
24 |     | \.hg
25 |     | \.mypy_cache
26 |     | \.tox
27 |     | \.vscode
28 |     | \.idea
29 |     | \.ipynb_checkpoints
30 |     | \.dvc
31 |     | _build
32 |     | buck-out
33 |     | build
34 |     | dist
35 |     | venv
36 |     | node_modules
37 |   )/
38 |   | version.py          # also separately exclude a file named foo.py in
39 |                         # the root of the project
40 | )
41 | '''


--------------------------------------------------------------------------------
/pytorch_gpu.Dockerfile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # This uses Ubuntu with Python 3.10
16 | ARG PYTORCH_SERVING_BUILD_IMAGE=pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
17 | 
18 | FROM ${PYTORCH_SERVING_BUILD_IMAGE}
19 | 
20 | WORKDIR /workspace
21 | 
22 | COPY requirements.txt requirements.txt
23 | 
24 | RUN pip install --upgrade pip \
25 |     && pip install --no-cache-dir -r requirements.txt \
26 |     && rm -f requirements.txt
27 | 
28 | # Copy files from official SDK image, including script/dependencies.
29 | COPY --from=apache/beam_python3.10_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
30 | 
31 | # Set the entrypoint to Apache Beam SDK launcher.
32 | ENTRYPOINT ["/opt/apache/beam/boot"]


--------------------------------------------------------------------------------
/requirements.dev.txt:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | 
16 | #building
17 | setuptools>=67.7.2
18 | 
19 | # format/lint
20 | flake8>=5.0.4
21 | isort>=5.6.4
22 | pre-commit>=2.9.3
23 | black>=22.3.0
24 | 
25 | # test
26 | pytest>=6.2.1
27 | pytest-cov>=2.10.1
28 | pytest-ordering
29 | pytest-env
30 | 


--------------------------------------------------------------------------------
/requirements.prod.txt:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | apache-beam[gcp]==${BEAM_VERSION}
16 | pydantic<2.0.0
17 | torch>=1.7.1
18 | torchvision>=0.8.2
19 | pillow>=8.0.0
20 | tensorflow
21 | tensorflow_hub
22 | numpy<2.0.0
23 | pyOpenSSL
24 | 


--------------------------------------------------------------------------------
/scripts/check-beam.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  Copyright 2023 Google LLC
 4 | 
 5 | #  Licensed under the Apache License, Version 2.0 (the "License");
 6 | #  you may not use this file except in compliance with the License.
 7 | #  You may obtain a copy of the License at
 8 | 
 9 | #       https://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | 
17 | # Import environment variables from .env file.
18 | source .env
19 | 
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 |   echo "The PROJECT_ID environment variable is not set."
23 |   exit 1
24 | fi
25 | 
26 | if [ -z "${ZONE}" ]; then
27 |   echo "The ZONE environment variable is not set."
28 |   exit 1
29 | fi
30 | 
31 | if [ -z "${VM_NAME}" ]; then
32 |   echo "The VM_NAME environment variable is not set."
33 |   exit 1
34 | fi
35 | 
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 |   echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 |   exit 1
39 | fi
40 | 
41 | echo "Checking Python version on VM..."
42 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
43 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
44 |   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
45 |   --privileged $CUSTOM_CONTAINER_IMAGE -c \
46 |   \"python --version\""
47 | 
48 | echo "Checking venv exists on VM..."
49 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
50 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
51 |   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
52 |   --privileged $CUSTOM_CONTAINER_IMAGE -c \
53 |   'apt list --installed | grep python3-venv'"
54 | 
55 | echo "Checking Beam Version on VM..."
56 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
57 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
58 |   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
59 |   --privileged $CUSTOM_CONTAINER_IMAGE -c \
60 |   \"python -c 'import apache_beam as beam; print(beam.__version__)'\""


--------------------------------------------------------------------------------
/scripts/check-pipeline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  Copyright 2023 Google LLC
 4 | 
 5 | #  Licensed under the Apache License, Version 2.0 (the "License");
 6 | #  you may not use this file except in compliance with the License.
 7 | #  You may obtain a copy of the License at
 8 | 
 9 | #       https://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | 
17 | # Import environment variables from .env file.
18 | source .env
19 | 
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 |   echo "The PROJECT_ID environment variable is not set."
23 |   exit 1
24 | fi
25 | 
26 | if [ -z "${ZONE}" ]; then
27 |   echo "The ZONE environment variable is not set."
28 |   exit 1
29 | fi
30 | 
31 | if [ -z "${VM_NAME}" ]; then
32 |   echo "The VM_NAME environment variable is not set."
33 |   exit 1
34 | fi
35 | 
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 |   echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 |   exit 1
39 | fi
40 | 
41 | vm_ssh="gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command"
42 | vm_scp="gcloud compute scp --strict-host-key-checking=no --project $PROJECT_ID --zone=$ZONE --quiet"
43 | 
44 | # Package the local code and copy it to VM
45 | PACKAGE_NAME="my_project-0.0.1"
46 | python3 setup.py sdist
47 | $vm_ssh "sudo rm -fr ~/*"
48 | $vm_scp dist/$PACKAGE_NAME.tar.gz data/openimage_10.txt $VM_NAME:~/
49 | $vm_ssh "tar zxvf $PACKAGE_NAME.tar.gz; mv openimage_10.txt $PACKAGE_NAME"
50 | 
51 | # Test the model on GPUs
52 | if [ -z "${TF_MODEL_URI}" ]; then
53 | echo "Running the PyTorch model on GPU..."
54 | $vm_ssh "docker run --entrypoint /bin/bash \
55 | --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
56 | --volume /home/\$USER/:/workspace/\$USER --privileged $CUSTOM_CONTAINER_IMAGE -c \
57 | \"cd \$USER/$PACKAGE_NAME; python -m my_project.run --input openimage_10.txt  --output beam-output/beam_test_out.txt  --model_state_dict_path  $MODEL_STATE_DICT_PATH --model_name $MODEL_NAME --device GPU\""
58 | else
59 | echo "Running the Tensorflow model on GPU..."
60 | $vm_ssh "docker run --entrypoint /bin/bash \
61 | --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
62 | --volume /home/\$USER/:/workspace/\$USER --privileged $CUSTOM_CONTAINER_IMAGE -c \
63 | \"cd \$USER/$PACKAGE_NAME; python -m my_project.run --input openimage_10.txt  --output beam-output/beam_test_out.txt  --tf_model_uri $TF_MODEL_URI --device GPU\""
64 | fi
65 | 
66 | $vm_ssh "[ -f './$PACKAGE_NAME/beam-output/beam_test_out.txt' ] && echo 'The DirectRunner run succeeded on GPU!' || echo 'The DirectRunner run failed on GPU!'"


--------------------------------------------------------------------------------
/scripts/check-tf-on-gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  Copyright 2023 Google LLC
 4 | 
 5 | #  Licensed under the Apache License, Version 2.0 (the "License");
 6 | #  you may not use this file except in compliance with the License.
 7 | #  You may obtain a copy of the License at
 8 | 
 9 | #       https://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | 
17 | # Import environment variables from .env file.
18 | source .env
19 | 
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 |   echo "The PROJECT_ID environment variable is not set."
23 |   exit 1
24 | fi
25 | 
26 | if [ -z "${ZONE}" ]; then
27 |   echo "The ZONE environment variable is not set."
28 |   exit 1
29 | fi
30 | 
31 | if [ -z "${VM_NAME}" ]; then
32 |   echo "The VM_NAME environment variable is not set."
33 |   exit 1
34 | fi
35 | 
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 |   echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 |   exit 1
39 | fi
40 | 
41 | echo "Checking Tensorflow on GPU..."
42 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
43 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
44 |   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
45 |   --privileged $CUSTOM_CONTAINER_IMAGE -c \
46 |   \"python -c 'import tensorflow as tf; print(tf.config.list_physical_devices())'\""


--------------------------------------------------------------------------------
/scripts/check-torch-on-gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  Copyright 2023 Google LLC
 4 | 
 5 | #  Licensed under the Apache License, Version 2.0 (the "License");
 6 | #  you may not use this file except in compliance with the License.
 7 | #  You may obtain a copy of the License at
 8 | 
 9 | #       https://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | 
17 | # Import environment variables from .env file.
18 | source .env
19 | 
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 |   echo "The PROJECT_ID environment variable is not set."
23 |   exit 1
24 | fi
25 | 
26 | if [ -z "${ZONE}" ]; then
27 |   echo "The ZONE environment variable is not set."
28 |   exit 1
29 | fi
30 | 
31 | if [ -z "${VM_NAME}" ]; then
32 |   echo "The VM_NAME environment variable is not set."
33 |   exit 1
34 | fi
35 | 
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 |   echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 |   exit 1
39 | fi
40 | 
41 | echo "Checking PyTorch on GPU..."
42 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
43 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
44 |   --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
45 |   --privileged $CUSTOM_CONTAINER_IMAGE -c \
46 |   \"python -c 'import torch; print(torch.cuda.is_available()); print(torch.cuda.get_device_name())'\""


--------------------------------------------------------------------------------
/scripts/create-gpu-vm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  Copyright 2023 Google LLC
 4 | 
 5 | #  Licensed under the Apache License, Version 2.0 (the "License");
 6 | #  you may not use this file except in compliance with the License.
 7 | #  You may obtain a copy of the License at
 8 | 
 9 | #       https://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | 
17 | # Import environment variables from .env file.
18 | source .env
19 | 
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 |   echo "The PROJECT_ID environment variable is not set."
23 |   exit 1
24 | fi
25 | 
26 | if [ -z "${ZONE}" ]; then
27 |   echo "The ZONE environment variable is not set."
28 |   exit 1
29 | fi
30 | 
31 | if [ -z "${VM_NAME}" ]; then
32 |   echo "The VM_NAME environment variable is not set."
33 |   exit 1
34 | fi
35 | 
36 | if [ -z "${MACHINE_TYPE}" ]; then
37 |   echo "The MACHINE_TYPE environment variable is not set."
38 |   exit 1
39 | fi
40 | 
41 | # Set the number of GPUs to attach to the VM.
42 | GPU_COUNT=1
43 | GPU_TYPE="nvidia-tesla-t4"
44 | 
45 | # Create the VM.
46 | echo "Waiting for VM to be created (this will take a few minutes)..."
47 | 
48 | gcloud compute instances create $VM_NAME \
49 |   --project $PROJECT_ID \
50 |   --zone $ZONE \
51 |   --machine-type $MACHINE_TYPE \
52 |   --accelerator count=$GPU_COUNT,type=$GPU_TYPE \
53 |   --image-family cos-stable \
54 |   --image-project=cos-cloud  \
55 |   --maintenance-policy TERMINATE \
56 |   --restart-on-failure  \
57 |   --boot-disk-size=200G \
58 |   --scopes=cloud-platform
59 | 
60 | # Wait for the VM to be created.
61 | STATUS=""
62 | while [ "$STATUS" != "RUNNING" ]; do
63 |     sleep 5
64 |     STATUS=$(gcloud compute instances describe $VM_NAME --project $PROJECT_ID --zone=$ZONE --format="value(status)")
65 | done
66 | 
67 | echo "VM $VM_NAME is now running."
68 | 
69 | # Print the VM's IP address.
70 | echo "VM IP address: $(gcloud compute instances describe $VM_NAME --project $PROJECT_ID --zone=$ZONE --format='value(networkInterfaces[0].accessConfigs[0].natIP)')"
71 | 
72 | # Install GPU driver
73 | echo "Installing Nvidia GPU driver..."
74 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --tunnel-through-iap --quiet \
75 | --command "cos-extensions install gpu && sudo mount --bind /var/lib/nvidia /var/lib/nvidia && sudo mount -o remount,exec /var/lib/nvidia"
76 | 
77 | vm_ssh="gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command"
78 | 
79 | echo "Getting the GPU driver information..."
80 | $vm_ssh "/var/lib/nvidia/bin/nvidia-smi"
81 | 
82 | # docker-credential-gcr
83 | if [[ -n "$DOCKER_CREDENTIAL_REGISTRIES" ]]; then
84 |     echo "HOME is defined."
85 |     echo "Authenticating us-docker.pkg.dev..."
86 |     $vm_ssh "docker-credential-gcr configure-docker --registries=$DOCKER_CREDENTIAL_REGISTRIES"
87 | fi


--------------------------------------------------------------------------------
/scripts/get_beam_version.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # third party libraries
16 | import requests
17 | from packaging.version import Version
18 | 
19 | 
20 | def beam_versions(package_name, limit_releases=10):
21 |     url = f"https://pypi.org/pypi/{package_name}/json"
22 |     data = requests.get(url).json()
23 |     versions = list(data["releases"].keys())
24 |     versions.sort(key=Version, reverse=True)
25 |     return versions[:limit_releases]
26 | 
27 | 
28 | print("\n".join(beam_versions("apache-beam", 1)))
29 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # standard libraries
16 | import os
17 | 
18 | # third party libraries
19 | import setuptools
20 | 
21 | required = []
22 | if os.path.exists("requirements.txt"):
23 |     with open("requirements.txt") as f:
24 |         required = f.read().splitlines()
25 | 
26 | setuptools.setup(
27 |     name="my_project",
28 |     version="0.0.1",
29 |     install_requires=required,
30 |     packages=["my_project"],
31 | )
32 | 


--------------------------------------------------------------------------------
/tensor_rt.Dockerfile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # This needs Python 3.8 for your local runtime environment
16 | ARG PYTORCH_SERVING_BUILD_IMAGE=nvcr.io/nvidia/pytorch:22.11-py3
17 | 
18 | FROM ${PYTORCH_SERVING_BUILD_IMAGE}
19 | 
20 | ENV PATH="/usr/src/tensorrt/bin:${PATH}"
21 | 
22 | WORKDIR /workspace
23 | 
24 | COPY requirements.txt requirements.txt
25 | 
26 | ENV DEBIAN_FRONTEND=noninteractive
27 | 
28 | RUN apt-get update \
29 |     && apt install python3.8 python3.8-venv python3-venv -y \
30 |     && pip install --upgrade pip \
31 |     && apt-get install ffmpeg libsm6 libxext6 -y --no-install-recommends \
32 |     && pip install cuda-python onnx numpy onnxruntime common \
33 |     && pip install git+https://github.com/facebookresearch/detectron2.git@5aeb252b194b93dc2879b4ac34bc51a31b5aee13 \
34 |     && pip install git+https://github.com/NVIDIA/TensorRT#subdirectory=tools/onnx-graphsurgeon
35 | 
36 | RUN pip install --no-cache-dir -r requirements.txt && rm -f requirements.txt
37 | 
38 | # Copy files from official SDK image, including script/dependencies.
39 | COPY --from=apache/beam_python3.8_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
40 | 
41 | # Set the entrypoint to Apache Beam SDK launcher.
42 | ENTRYPOINT ["/opt/apache/beam/boot"]


--------------------------------------------------------------------------------
/tensorflow_gpu.Dockerfile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # This needs Python 3.8 for your local runtime environment
16 | 
17 | # Select an NVIDIA base image with desired GPU stack from https://ngc.nvidia.com/catalog/containers/nvidia:cuda
18 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
19 | 
20 | WORKDIR /workspace
21 | 
22 | COPY requirements.txt requirements.txt
23 | 
24 | RUN \
25 |     # Add Deadsnakes repository that has a variety of Python packages for Ubuntu.
26 |     # See: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
27 |     apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 \
28 |     && echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
29 |     && echo "deb-src http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
30 |     && apt-get update \
31 |     && apt-get install -y curl \
32 |         python3.8 \
33 |         python3.8-venv \
34 |         python3-venv \
35 |         # With python3.8 package, distutils need to be installed separately.
36 |         python3-distutils \
37 |     && rm -rf /var/lib/apt/lists/* \
38 |     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
39 |     && curl https://bootstrap.pypa.io/pip/3.8/get-pip.py | python \
40 |     && pip install --upgrade pip \
41 |     && pip install --no-cache-dir -r requirements.txt \
42 |     && pip install --no-cache-dir tensorflow==2.12.1 \
43 |     && pip install --no-cache-dir torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118
44 | 
45 | # Copy files from official SDK image, including script/dependencies.
46 | COPY --from=apache/beam_python3.8_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
47 | 
48 | # Set the entrypoint to Apache Beam SDK launcher.
49 | ENTRYPOINT ["/opt/apache/beam/boot"]


--------------------------------------------------------------------------------
/tensorflow_gpu.flex.Dockerfile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # This needs Python 3.8 for your local runtime environment
16 | 
17 | FROM gcr.io/dataflow-templates-base/flex-template-launcher-image:latest as template_launcher
18 | 
19 | # Select an NVIDIA base image with desired GPU stack from https://ngc.nvidia.com/catalog/containers/nvidia:cuda
20 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
21 | 
22 | WORKDIR /workspace
23 | 
24 | COPY requirements.txt requirements.txt
25 | 
26 | RUN \
27 |     # Add Deadsnakes repository that has a variety of Python packages for Ubuntu.
28 |     # See: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
29 |     apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 \
30 |     && echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
31 |     && echo "deb-src http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
32 |     && apt-get update \
33 |     && apt-get install -y curl \
34 |         python3.8 \
35 |         python3.8-venv \
36 |         python3-venv \
37 |         # With python3.8 package, distutils need to be installed separately.
38 |         python3-distutils \
39 |     && rm -rf /var/lib/apt/lists/* \
40 |     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
41 |     && curl https://bootstrap.pypa.io/pip/3.8/get-pip.py | python \
42 |     && pip install --upgrade pip \
43 |     && pip install --no-cache-dir -r requirements.txt \
44 |     && pip install --no-cache-dir tensorflow==2.12.1 \
45 |     && pip install --no-cache-dir torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118
46 | 
47 | # Copy the run module
48 | COPY my_project/ /workspace/my_project
49 | RUN rm -fr /workspace/my_project/__pycache__
50 | 
51 | #Specifies which Python file to run to launch the Flex Template.
52 | ENV FLEX_TEMPLATE_PYTHON_PY_FILE="my_project/run.py"
53 | 
54 | # Since we already downloaded all the dependencies, there's no need to rebuild everything.
55 | ENV PIP_NO_DEPS=True
56 | 
57 | ENV PYTHONPATH "${PYTHONPATH}:/workspace/my_project/"
58 | 
59 | # Copy the Dataflow Template launcher
60 | COPY --from=template_launcher /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher
61 | 
62 | # Copy files from official SDK image, including script/dependencies.
63 | # Note Python 3.8 is used since the above setup uses Python 3.8.
64 | COPY --from=apache/beam_python3.8_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
65 | 
66 | # Set the entrypoint to the Dataflow Template launcher
67 | # Use this if the launcher image is different with the custom container image
68 | # ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"]
69 | 
70 | # Set the entrypoint to Apache Beam SDK launcher.
71 | ENTRYPOINT ["/opt/apache/beam/boot"]


--------------------------------------------------------------------------------
/tests/sample.env.pytorch:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ### PYTHON SDK SETTINGS
 3 | ################################################################################
 4 | PYTHON_VERSION=3.10
 5 | BEAM_VERSION=2.60.0
 6 | DOCKERFILE_TEMPLATE=pytorch_gpu.Dockerfile
 7 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
 8 | ################################################################################
 9 | ### GCP SETTINGS
10 | ################################################################################
11 | PROJECT_ID=apache-beam-testing
12 | REGION=us-central1
13 | ZONE=us-central1-f
14 | DISK_SIZE_GB=50
15 | MACHINE_TYPE=n1-standard-2
16 | VM_NAME=beam-ml-starter-gpu
17 | ################################################################################
18 | ### DATAFLOW JOB SETTINGS
19 | ################################################################################
20 | STAGING_LOCATION=gs://temp-storage-for-perf-tests/loadtests
21 | TEMP_LOCATION=gs://temp-storage-for-perf-tests/loadtests
22 | CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/dataflow-ml-starter/pytorch_gpu:test
23 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
24 | ################################################################################
25 | ### DATAFLOW JOB MODEL SETTINGS
26 | ################################################################################
27 | MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth"
28 | MODEL_NAME=mobilenet_v2
29 | ################################################################################
30 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
31 | ################################################################################
32 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
33 | OUTPUT_DATA="gs://temp-storage-for-end-to-end-tests/temp-storage-for-end-to-end-tests/dataflow-ml-starter/result_gpu.txt"


--------------------------------------------------------------------------------
/tests/sample.env.tf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ### PYTHON SDK SETTINGS
 3 | ################################################################################
 4 | PYTHON_VERSION=3.8
 5 | BEAM_VERSION=2.48.0
 6 | DOCKERFILE_TEMPLATE=tensorflow_gpu.Dockerfile
 7 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
 8 | ################################################################################
 9 | ### GCP SETTINGS
10 | ################################################################################
11 | PROJECT_ID=apache-beam-testing
12 | REGION=us-central1
13 | ZONE=us-central1-f
14 | DISK_SIZE_GB=50
15 | MACHINE_TYPE=n1-standard-2
16 | VM_NAME=beam-ml-starter-gpu
17 | ################################################################################
18 | ### DATAFLOW JOB SETTINGS
19 | ################################################################################
20 | STAGING_LOCATION=gs://temp-storage-for-perf-tests/loadtests
21 | TEMP_LOCATION=gs://temp-storage-for-perf-tests/loadtests
22 | CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/dataflow-ml-starter/tf_gpu:test
23 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
24 | ################################################################################
25 | ### DATAFLOW JOB MODEL SETTINGS
26 | ################################################################################
27 | #TF_MODEL_URI: only support TF2 models (https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2)
28 | TF_MODEL_URI=https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4
29 | ################################################################################
30 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
31 | ################################################################################
32 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
33 | OUTPUT_DATA="gs://temp-storage-for-end-to-end-tests/temp-storage-for-end-to-end-tests/dataflow-ml-starter/result_gpu.txt"


--------------------------------------------------------------------------------
/tests/test_pipeline.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 Google LLC
 2 | 
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | 
 7 | #       https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | # standard libraries
16 | from pathlib import Path
17 | 
18 | # third party libraries
19 | import apache_beam as beam
20 | 
21 | # Dataflow ML libraries
22 | # dfml libraries
23 | from my_project.config import ModelConfig, SinkConfig, SourceConfig
24 | from my_project.pipeline import build_pipeline
25 | 
26 | DATA_FILE_PATH = Path(__file__).parent.parent / "data"
27 | 
28 | 
29 | def test_build_pipeline():
30 |     model_config = ModelConfig(
31 |         model_state_dict_path="gs://apache-beam-ml/models/torchvision.models.resnet101.pth",
32 |         model_class_name="resnet101",
33 |         model_params={"num_classes": 1000},
34 |     )
35 |     source_config = SourceConfig(input=str(DATA_FILE_PATH / "openimage_10.txt"))
36 |     sink_config = SinkConfig(output="beam-output/my_output.txt")
37 | 
38 |     p = beam.Pipeline()
39 |     build_pipeline(p, source_config=source_config, sink_config=sink_config, model_config=model_config)
40 | 
41 | 
42 | def test_build_pipeline_with_tf():
43 |     model_config = ModelConfig(
44 |         tf_model_uri="https://tfhub.dev/google/imagenet/mobilenet_v1_075_192/quantops/classification/3",
45 |     )
46 |     source_config = SourceConfig(input=str(DATA_FILE_PATH / "openimage_10.txt"))
47 |     sink_config = SinkConfig(output="beam-output/my_output.txt")
48 | 
49 |     p = beam.Pipeline()
50 |     build_pipeline(p, source_config=source_config, sink_config=sink_config, model_config=model_config)
51 | 
52 | 
53 | def test_source_config_streaming():
54 |     source_config = SourceConfig(input=str(DATA_FILE_PATH / "openimage_10.txt"))
55 |     assert source_config.streaming is False
56 |     source_config = SourceConfig(input="projects/apache-beam-testing/topics/Imagenet_openimage_50k_benchmark")
57 |     assert source_config.streaming is True
58 | 


--------------------------------------------------------------------------------