├── .env.template
├── .flake8
├── .github
└── workflows
│ ├── docker.yml
│ ├── docker_test.yml
│ └── pr.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── data
└── openimage_10.txt
├── flex
└── metadata.json
├── my_project
├── __init__.py
├── config.py
├── pipeline.py
└── run.py
├── pyproject.toml
├── pytorch_gpu.Dockerfile
├── requirements.dev.txt
├── requirements.prod.txt
├── scripts
├── check-beam.sh
├── check-pipeline.sh
├── check-tf-on-gpu.sh
├── check-torch-on-gpu.sh
├── create-gpu-vm.sh
└── get_beam_version.py
├── setup.py
├── tensor_rt.Dockerfile
├── tensorflow_gpu.Dockerfile
├── tensorflow_gpu.flex.Dockerfile
└── tests
├── sample.env.pytorch
├── sample.env.tf
└── test_pipeline.py
/.env.template:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | ### PYTHON/SDK/DOCKER SETTINGS
3 | ################################################################################
4 | ##Pytorch + Py3.10 + Beam 2.47.0
5 | PYTHON_VERSION=3.10
6 | BEAM_VERSION=2.47.0
7 | DOCKERFILE_TEMPLATE=pytorch_gpu.Dockerfile
8 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
9 | ##Pytorch + Tensor_RT + Py3.8 + Beam 2.46.0
10 | #PYTHON_VERSION=3.8
11 | #BEAM_VERSION=2.46.0
12 | #DOCKERFILE_TEMPLATE=tensor_rt.Dockerfile
13 | ################################################################################
14 | ### GCP SETTINGS
15 | ################################################################################
16 | PROJECT_ID=your-gcp-project-id
17 | REGION=your-region-to-run-dataflow-jobs
18 | ZONE=your-zone-to-run-vm
19 | DISK_SIZE_GB=50
20 | MACHINE_TYPE=n1-standard-2
21 | VM_NAME=beam-ml-starter-gpu
22 | ################################################################################
23 | ### DATAFLOW JOB SETTINGS
24 | ################################################################################
25 | STAGING_LOCATION=your-gcs-bucket-for-staging-files
26 | TEMP_LOCATION=your-gcs-bucket-for-temp-files
27 | CUSTOM_CONTAINER_IMAGE=your-gcr-image-uri-for-custom-container
28 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
29 | ################################################################################
30 | ### DATAFLOW JOB MODEL SETTINGS
31 | ################################################################################
32 | ### PYTORCH MODEL EXAMPLES
33 | ## mobilenet_v2
34 | MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth"
35 | MODEL_NAME=mobilenet_v2
36 | ## resnet101
37 | #MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.resnet101.pth"
38 | #MODEL_NAME=resnet101
39 | ### TF MODEL URI EXAMPLES
40 | #TF_MODEL_URI: only support TF2 models (https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2)
41 | #TF_MODEL_URI=https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4
42 | ################################################################################
43 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
44 | ################################################################################
45 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
46 | OUTPUT_DATA=your-gcs-bucket-for-saving-prediction-results
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | max-complexity = 40
4 | ignore =
5 | E203
6 | W503
7 | exclude =
8 | .eggs
9 | .git
10 | .tox
11 | __pycache__
12 | build
13 | dist
14 | venv
--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | name: Build and push Docker image to GCP Artifact Registry
16 |
17 | on:
18 | workflow_dispatch:
19 | push:
20 | branches:
21 | - main
22 | schedule:
23 | # Every Monday at 1PM UTC (9AM EST)
24 | - cron: "0 13 * * 1"
25 |
26 | jobs:
27 | build-and-push:
28 | runs-on: ubuntu-latest
29 |
30 | steps:
31 | - name: Free Disk Space (Ubuntu)
32 | uses: jlumbroso/free-disk-space@main
33 | with:
34 | # this might remove tools that are actually needed,
35 | # if set to "true" but frees about 6 GB
36 | tool-cache: false
37 |
38 | android: true
39 | dotnet: true
40 | haskell: true
41 | large-packages: false
42 | docker-images: true
43 | swap-storage: true
44 | - name: Checkout
45 | uses: actions/checkout@v3
46 | - id: "auth"
47 | name: Authenticate to Google Cloud
48 | uses: google-github-actions/auth@v1.1.1
49 | with:
50 | credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
51 | token_format: access_token
52 | - name: Docker login
53 | uses: "docker/login-action@v1"
54 | with:
55 | registry: "us-docker.pkg.dev"
56 | username: "oauth2accesstoken"
57 | password: "${{ steps.auth.outputs.access_token }}"
58 | - name: Set up Python 3.8
59 | uses: actions/setup-python@v4
60 | with:
61 | python-version: "3.8"
62 | - name: Init env
63 | run: |
64 | cp tests/sample.env.tf .env
65 | echo '${{ steps.auth.outputs.access_token }}' | docker login -u oauth2accesstoken --password-stdin https://us-docker.pkg.dev
66 | make init
67 | - name: Build and push Docker image
68 | run: |
69 | make docker
70 | - name: Test Docker image
71 | run: |
72 | make run-df-gpu
73 |
--------------------------------------------------------------------------------
/.github/workflows/docker_test.yml:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | name: Build and push Docker image to GCP Artifact Registry with the latest Beam
16 |
17 | on:
18 | workflow_dispatch:
19 |
20 | jobs:
21 | build-and-push:
22 | runs-on: ubuntu-latest
23 |
24 | steps:
25 | - name: Free Disk Space (Ubuntu)
26 | uses: jlumbroso/free-disk-space@main
27 | with:
28 | # this might remove tools that are actually needed,
29 | # if set to "true" but frees about 6 GB
30 | tool-cache: false
31 |
32 | android: true
33 | dotnet: true
34 | haskell: true
35 | large-packages: false
36 | docker-images: true
37 | swap-storage: true
38 | - name: Checkout
39 | uses: actions/checkout@v3
40 | - id: "auth"
41 | name: Authenticate to Google Cloud
42 | uses: google-github-actions/auth@v1.1.1
43 | with:
44 | credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
45 | token_format: access_token
46 | - name: Docker login
47 | uses: "docker/login-action@v1"
48 | with:
49 | registry: "us-docker.pkg.dev"
50 | username: "oauth2accesstoken"
51 | password: "${{ steps.auth.outputs.access_token }}"
52 | - name: Set up Python 3.10
53 | uses: actions/setup-python@v4
54 | with:
55 | python-version: "3.10"
56 | - name: Init env with the test Beam and docker URI
57 | run: |
58 | cp tests/sample.env.pytorch .env
59 | make init-venv
60 | ./venv/bin/pip install requests packaging
61 | make test-latest-env
62 | sed -i '/CUSTOM_CONTAINER_IMAGE=/d' .env
63 | echo -e "\n" >> .env
64 | echo "CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/dataflow-ml-starter/pytorch_gpu:test-beam" >> .env
65 | echo '${{ steps.auth.outputs.access_token }}' | docker login -u oauth2accesstoken --password-stdin https://us-docker.pkg.dev
66 | make init
67 | - name: Build and push Docker image
68 | run: |
69 | make docker
70 | - name: Test Docker image
71 | run: |
72 | make run-df-gpu
73 |
--------------------------------------------------------------------------------
/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | name: Run basic tests with Python 3.8
16 |
17 | on: [push, pull_request, workflow_dispatch]
18 |
19 | jobs:
20 | tests:
21 | runs-on: ubuntu-latest
22 |
23 | steps:
24 | - uses: actions/checkout@v3
25 | - name: Set up Python 3.8
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: "3.8"
29 | - name: Init env
30 | run: |
31 | cp tests/sample.env.tf .env
32 | make init
33 | - name: Run local tests
34 | run: |
35 | make test
36 | - name: Run DirectRunner with TF
37 | run: |
38 | # tf model
39 | make run-direct
40 | test -f beam-output/beam_test_out.txt && echo "DirectRunner ran successfully!" || $(error "Cannot find beam-output/beam_test_out.txt!")
41 | - name: Run DirectRunner with PyTorch
42 | run: |
43 | # torch model
44 | sed -i '/TF_MODEL_URI=/d' .env
45 | echo -e "\n" >> .env
46 | echo "MODEL_STATE_DICT_PATH=gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth" >> .env
47 | echo -e "\n" >> .env
48 | echo "MODEL_NAME=mobilenet_v2" >> .env
49 | make run-direct
50 | test -f beam-output/beam_test_out.txt && echo "DirectRunner ran successfully!" || $(error "Cannot find beam-output/beam_test_out.txt!")
51 | # restore .env
52 | cp tests/sample.env.tf .env
53 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # beam temp
16 | beam-temp-*
17 | beam-output/
18 | Dockerfile
19 | requirements.txt
20 |
21 | # sys
22 | .DS_Store
23 |
24 | # Byte-compiled / optimized / DLL files
25 | __pycache__/
26 | *.py[cod]
27 | *$py.class
28 |
29 | # C extensions
30 | *.so
31 |
32 | # Distribution / packaging
33 | .Python
34 | build/
35 | develop-eggs/
36 | dist/
37 | downloads/
38 | eggs/
39 | .eggs/
40 | lib/
41 | lib64/
42 | parts/
43 | sdist/
44 | var/
45 | wheels/
46 | share/python-wheels/
47 | *.egg-info/
48 | .installed.cfg
49 | *.egg
50 | MANIFEST
51 |
52 | # PyInstaller
53 | # Usually these files are written by a python script from a template
54 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
55 | *.manifest
56 | *.spec
57 |
58 | # Installer logs
59 | pip-log.txt
60 | pip-delete-this-directory.txt
61 |
62 | # Unit test / coverage reports
63 | htmlcov/
64 | .tox/
65 | .nox/
66 | .coverage
67 | .coverage.*
68 | .cache
69 | nosetests.xml
70 | coverage.xml
71 | *.cover
72 | *.py,cover
73 | .hypothesis/
74 | .pytest_cache/
75 | cover/
76 |
77 | # Translations
78 | *.mo
79 | *.pot
80 |
81 | # Django stuff:
82 | *.log
83 | local_settings.py
84 | db.sqlite3
85 | db.sqlite3-journal
86 |
87 | # Flask stuff:
88 | instance/
89 | .webassets-cache
90 |
91 | # Scrapy stuff:
92 | .scrapy
93 |
94 | # Sphinx documentation
95 | docs/_build/
96 |
97 | # PyBuilder
98 | .pybuilder/
99 | target/
100 |
101 | # Jupyter Notebook
102 | .ipynb_checkpoints
103 |
104 | # IPython
105 | profile_default/
106 | ipython_config.py
107 |
108 | # pyenv
109 | # For a library or package, you might want to ignore these files since the code is
110 | # intended to run in multiple environments; otherwise, check them in:
111 | # .python-version
112 |
113 | # pipenv
114 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
115 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
116 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
117 | # install all needed dependencies.
118 | #Pipfile.lock
119 |
120 | # poetry
121 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
122 | # This is especially recommended for binary packages to ensure reproducibility, and is more
123 | # commonly ignored for libraries.
124 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
125 | #poetry.lock
126 |
127 | # pdm
128 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
129 | #pdm.lock
130 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
131 | # in version control.
132 | # https://pdm.fming.dev/#use-with-ide
133 | .pdm.toml
134 |
135 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
136 | __pypackages__/
137 |
138 | # Celery stuff
139 | celerybeat-schedule
140 | celerybeat.pid
141 |
142 | # SageMath parsed files
143 | *.sage.py
144 |
145 | # Environments
146 | .env*
147 | .venv
148 | env/
149 | venv/
150 | ENV/
151 | env.bak/
152 | venv.bak/
153 |
154 | # vscode
155 | .vscode
156 |
157 | # Spyder project settings
158 | .spyderproject
159 | .spyproject
160 |
161 | # Rope project settings
162 | .ropeproject
163 |
164 | # mkdocs documentation
165 | /site
166 |
167 | # mypy
168 | .mypy_cache/
169 | .dmypy.json
170 | dmypy.json
171 |
172 | # Pyre type checker
173 | .pyre/
174 |
175 | # pytype static type analyzer
176 | .pytype/
177 |
178 | # Cython debug symbols
179 | cython_debug/
180 |
181 | # PyCharm
182 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
183 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
184 | # and can be added to the global gitignore or merged into this file. For a more nuclear
185 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
186 | #.idea/
187 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | [settings]
16 | sections=FUTURE,STDLIB,THIRDPARTY,DFML,FIRSTPARTY,LOCALFOLDER
17 | import_heading_dfml=Dataflow ML libraries
18 | import_heading_stdlib=standard libraries
19 | import_heading_thirdparty=third party libraries
20 | include_trailing_comma=True
21 | indent=' '
22 | known_dfml=my_project
23 | dedup_headings=True
24 | line_length=120
25 | multi_line_output=3
26 | skip=./venv/,./venv-docs/,./.git/
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | exclude: ^docs/notebooks/
16 | repos:
17 | - repo: https://github.com/ambv/black
18 | rev: 23.3.0
19 | hooks:
20 | - id: black
21 | args: ["--config=pyproject.toml", "--check", "--diff"]
22 | - repo: https://github.com/pycqa/flake8
23 | rev: "6.0.0"
24 | hooks:
25 | - id: flake8
26 | args: ["--config=.flake8"]
27 | - repo: https://github.com/timothycrosley/isort
28 | rev: 5.12.0
29 | hooks:
30 | - id: isort
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We would love to accept your patches and contributions to this project.
4 |
5 | ## Before you begin
6 |
7 | ### Sign our Contributor License Agreement
8 |
9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 |
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 |
18 | Visit to see your current agreements or to
19 | sign a new one.
20 |
21 | ### Review our Community Guidelines
22 |
23 | This project follows [Google's Open Source Community
24 | Guidelines](https://opensource.google/conduct/).
25 |
26 | ## Contribution process
27 |
28 | ### Code Reviews
29 |
30 | All submissions, including submissions by project members, require review. We
31 | use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests)
32 | for this purpose.
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | SILENT:
16 | .PHONY:
17 | .DEFAULT_GOAL := help
18 |
19 | # Load environment variables from .env file
20 | TF_MODEL_URI :=
21 | include .env
22 | export
23 |
24 | define PRINT_HELP_PYSCRIPT
25 | import re, sys # isort:skip
26 |
27 | matches = []
28 | for line in sys.stdin:
29 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
30 | if match:
31 | matches.append(match.groups())
32 |
33 | for target, help in sorted(matches):
34 | print(" %-25s %s" % (target, help))
35 | endef
36 | export PRINT_HELP_PYSCRIPT
37 |
38 | PYTHON = python$(PYTHON_VERSION)
39 |
40 | ifndef TF_MODEL_URI
41 | MODEL_ENV := "TORCH"
42 | else
43 | MODEL_ENV := "TF"
44 | endif
45 |
46 | help: ## Print this help
47 | @echo
48 | @echo " make targets:"
49 | @echo
50 | @$(PYTHON) -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
51 |
52 | test-latest-env: ## Replace the Beam vesion with the latest version (including release candidates)
53 | $(eval LATEST_VERSION=$(shell ./venv/bin/python3 scripts/get_beam_version.py))
54 | @echo $(LATEST_VERSION)
55 | @sed 's/BEAM_VERSION=.*/BEAM_VERSION=$(LATEST_VERSION)/g' .env > .env.new && mv .env.new .env
56 |
57 | init-venv: ## Create virtual environment in venv folder
58 | @$(PYTHON) -m venv venv
59 |
60 | init: init-venv ## Init virtual environment
61 | @./venv/bin/python3 -m pip install -U pip
62 | @$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g" requirements.prod.txt > requirements.txt)
63 | @./venv/bin/python3 -m pip install -r requirements.txt
64 | @./venv/bin/python3 -m pip install -r requirements.dev.txt
65 | @./venv/bin/python3 -m pre_commit install --install-hooks --overwrite
66 | @mkdir -p beam-output
67 | @echo "use 'source venv/bin/activate' to activate venv "
68 | @./venv/bin/python3 -m pip install -e .
69 |
70 | format: ## Run formatter on source code
71 | @./venv/bin/python3 -m black --config=pyproject.toml .
72 |
73 | lint: ## Run linter on source code
74 | @./venv/bin/python3 -m black --config=pyproject.toml --check .
75 | @./venv/bin/python3 -m flake8 --config=.flake8 .
76 |
77 | clean-lite: ## Remove pycache files, pytest files, etc
78 | @rm -rf build dist .cache .coverage .coverage.* *.egg-info
79 | @find . -name .coverage | xargs rm -rf
80 | @find . -name .pytest_cache | xargs rm -rf
81 | @find . -name .tox | xargs rm -rf
82 | @find . -name __pycache__ | xargs rm -rf
83 | @find . -name *.egg-info | xargs rm -rf
84 |
85 | clean: clean-lite ## Remove virtual environment, downloaded models, etc
86 | @rm -rf venv
87 | @echo "run 'make init'"
88 |
89 | test: lint ## Run tests
90 | ./venv/bin/pytest -s -vv --cov=my_project --cov-fail-under=50 tests/
91 |
92 | run-direct: ## Run a local test with DirectRunner
93 | @rm -f beam-output/beam_test_out.txt
94 | ifeq ($(MODEL_ENV), "TORCH")
95 | time ./venv/bin/python3 -m my_project.run \
96 | --input data/openimage_10.txt \
97 | --output beam-output/beam_test_out.txt \
98 | --model_state_dict_path $(MODEL_STATE_DICT_PATH) \
99 | --model_name $(MODEL_NAME)
100 | else
101 | time ./venv/bin/python3 -m my_project.run \
102 | --input data/openimage_10.txt \
103 | --output beam-output/beam_test_out.txt \
104 | --tf_model_uri $(TF_MODEL_URI)
105 | endif
106 |
107 | docker: ## Build a custom docker image and push it to Artifact Registry
108 | @$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g; s|\$${PYTHON_VERSION}|$(PYTHON_VERSION)|g" ${DOCKERFILE_TEMPLATE} > Dockerfile)
109 | docker build --platform linux/amd64 -t $(CUSTOM_CONTAINER_IMAGE) -f Dockerfile .
110 | docker push $(CUSTOM_CONTAINER_IMAGE)
111 |
112 | run-df-gpu: ## Run a Dataflow job using the custom container with GPUs
113 | $(eval JOB_NAME := beam-ml-starter-gpu-$(shell date +%s)-$(shell echo $$$$))
114 | ifeq ($(MODEL_ENV), "TORCH")
115 | time ./venv/bin/python3 -m my_project.run \
116 | --runner DataflowRunner \
117 | --job_name $(JOB_NAME) \
118 | --project $(PROJECT_ID) \
119 | --region $(REGION) \
120 | --machine_type $(MACHINE_TYPE) \
121 | --disk_size_gb $(DISK_SIZE_GB) \
122 | --staging_location $(STAGING_LOCATION) \
123 | --temp_location $(TEMP_LOCATION) \
124 | --setup_file ./setup.py \
125 | --device GPU \
126 | --dataflow_service_option $(SERVICE_OPTIONS) \
127 | --number_of_worker_harness_threads 1 \
128 | --experiments=disable_worker_container_image_prepull \
129 | --experiments=use_pubsub_streaming \
130 | --sdk_container_image $(CUSTOM_CONTAINER_IMAGE) \
131 | --sdk_location container \
132 | --input $(INPUT_DATA) \
133 | --output $(OUTPUT_DATA) \
134 | --model_state_dict_path $(MODEL_STATE_DICT_PATH) \
135 | --model_name $(MODEL_NAME)
136 | else
137 | time ./venv/bin/python3 -m my_project.run \
138 | --runner DataflowRunner \
139 | --job_name $(JOB_NAME) \
140 | --project $(PROJECT_ID) \
141 | --region $(REGION) \
142 | --machine_type $(MACHINE_TYPE) \
143 | --disk_size_gb $(DISK_SIZE_GB) \
144 | --staging_location $(STAGING_LOCATION) \
145 | --temp_location $(TEMP_LOCATION) \
146 | --setup_file ./setup.py \
147 | --device GPU \
148 | --dataflow_service_option $(SERVICE_OPTIONS) \
149 | --number_of_worker_harness_threads 1 \
150 | --experiments=disable_worker_container_image_prepull \
151 | --experiments=use_pubsub_streaming \
152 | --sdk_container_image $(CUSTOM_CONTAINER_IMAGE) \
153 | --sdk_location container \
154 | --input $(INPUT_DATA) \
155 | --output $(OUTPUT_DATA) \
156 | --tf_model_uri $(TF_MODEL_URI)
157 | endif
158 |
159 | run-df-cpu: ## Run a Dataflow job with CPUs and without Custom Container
160 | @$(shell sed "s|\$${BEAM_VERSION}|$(BEAM_VERSION)|g" requirements.txt > beam-output/requirements.txt)
161 | @$(eval JOB_NAME := beam-ml-starter-cpu-$(shell date +%s)-$(shell echo $$$$))
162 | ifeq ($(MODEL_ENV), "TORCH")
163 | time ./venv/bin/python3 -m my_project.run \
164 | --runner DataflowRunner \
165 | --job_name $(JOB_NAME) \
166 | --project $(PROJECT_ID) \
167 | --region $(REGION) \
168 | --machine_type $(MACHINE_TYPE) \
169 | --disk_size_gb $(DISK_SIZE_GB) \
170 | --staging_location $(STAGING_LOCATION) \
171 | --temp_location $(TEMP_LOCATION) \
172 | --requirements_file requirements.txt \
173 | --setup_file ./setup.py \
174 | --input $(INPUT_DATA) \
175 | --output $(OUTPUT_DATA) \
176 | --model_state_dict_path $(MODEL_STATE_DICT_PATH) \
177 | --model_name $(MODEL_NAME)
178 | else
179 | time ./venv/bin/python3 -m my_project.run \
180 | --runner DataflowRunner \
181 | --job_name $(JOB_NAME) \
182 | --project $(PROJECT_ID) \
183 | --region $(REGION) \
184 | --machine_type $(MACHINE_TYPE) \
185 | --disk_size_gb $(DISK_SIZE_GB) \
186 | --staging_location $(STAGING_LOCATION) \
187 | --temp_location $(TEMP_LOCATION) \
188 | --requirements_file requirements.txt \
189 | --setup_file ./setup.py \
190 | --input $(INPUT_DATA) \
191 | --output $(OUTPUT_DATA) \
192 | --tf_model_uri $(TF_MODEL_URI)
193 | endif
194 |
195 | create-vm: ## Create a VM with GPU to test the docker image
196 | @./scripts/create-gpu-vm.sh
197 |
198 | delete-vm: ## Delete a VM
199 | gcloud compute instances delete $(VM_NAME) --project $(PROJECT_ID) --zone $(ZONE) --quiet
200 |
201 | check-beam: ## Check whether Beam is installed on GPU using VM with Custom Container
202 | @./scripts/check-beam.sh
203 |
204 | check-tf-gpu: ## Check whether Tensorflow works on GPU using VM with Custom Container
205 | @./scripts/check-tf-on-gpu.sh
206 |
207 | check-torch-gpu: ## Check whether PyTorch works on GPU using VM with Custom Container
208 | @./scripts/check-torch-on-gpu.sh
209 |
210 | check-pipeline: ## Check whether the Beam pipeline can run on GPU using VM with Custom Container and DirectRunner
211 | @./scripts/check-pipeline.sh
212 |
213 | create-flex-template: ## Create a Flex Template file using a Flex Template custom container
214 | gcloud dataflow flex-template build $(TEMPLATE_FILE_GCS_PATH) \
215 | --image $(CUSTOM_CONTAINER_IMAGE) \
216 | --metadata-file ./flex/metadata.json \
217 | --sdk-language "PYTHON" \
218 | --staging-location $(STAGING_LOCATION) \
219 | --temp-location $(TEMP_LOCATION) \
220 | --project $(PROJECT_ID) \
221 | --worker-region $(REGION) \
222 | --worker-machine-type $(MACHINE_TYPE)
223 |
224 | run-df-gpu-flex: ## Run a Dataflow job using the Flex Template
225 | $(eval JOB_NAME := beam-ml-starter-gpu-flex-$(shell date +%s)-$(shell echo $$$$))
226 | ifeq ($(MODEL_ENV), "TORCH")
227 | gcloud dataflow flex-template run $(JOB_NAME) \
228 | --template-file-gcs-location $(TEMPLATE_FILE_GCS_PATH) \
229 | --project $(PROJECT_ID) \
230 | --region $(REGION) \
231 | --worker-machine-type $(MACHINE_TYPE) \
232 | --additional-experiments disable_worker_container_image_prepull \
233 | --parameters number_of_worker_harness_threads=1 \
234 | --parameters sdk_location=container \
235 | --parameters sdk_container_image=$(CUSTOM_CONTAINER_IMAGE) \
236 | --parameters dataflow_service_option=$(SERVICE_OPTIONS) \
237 | --parameters input=$(INPUT_DATA) \
238 | --parameters output=$(OUTPUT_DATA) \
239 | --parameters device=GPU \
240 | --parameters model_state_dict_path=$(MODEL_STATE_DICT_PATH) \
241 | --parameters model_name=$(MODEL_NAME)
242 | else
243 | gcloud dataflow flex-template run $(JOB_NAME) \
244 | --template-file-gcs-location $(TEMPLATE_FILE_GCS_PATH) \
245 | --project $(PROJECT_ID) \
246 | --region $(REGION) \
247 | --worker-machine-type $(MACHINE_TYPE) \
248 | --additional-experiments disable_worker_container_image_prepull \
249 | --parameters number_of_worker_harness_threads=1 \
250 | --parameters sdk_location=container \
251 | --parameters sdk_container_image=$(CUSTOM_CONTAINER_IMAGE) \
252 | --parameters dataflow_service_option=$(SERVICE_OPTIONS) \
253 | --parameters input=$(INPUT_DATA) \
254 | --parameters output=$(OUTPUT_DATA) \
255 | --parameters device=GPU \
256 | --parameters tf_model_uri=$(TF_MODEL_URI)
257 | endif
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dataflow ML Starter Project
2 |
3 | ## Summary
4 | This repo contains a simple Beam RunInference project, which demonstrates how to run this Beam pipeline using DirectRunner to develop and test
5 | and launch the production job using DataflowRunner on either CPUs or GPUs. It can be served as a boilerplate to create a new Dataflow ML project.
6 |
7 | **This is not an officially supported Google product**.
8 |
9 | ## Prerequisites
10 |
11 | * conda
12 | * git
13 | * make
14 | * docker
15 | * gcloud
16 | * python3-venv
17 |
18 | ```bash
19 | sudo apt-get update
20 | sudo apt-get install -y python3-venv git make time wget
21 | ```
22 | Install Docker on Debian: https://docs.docker.com/engine/install/debian/
23 | Without sudo,
24 | ```bash
25 | sudo groupadd docker
26 | sudo usermod -aG docker $USER
27 | newgrp docker
28 | ```
29 |
30 | ## Directory structure
31 | ```
32 | .
33 | ├── LICENSE
34 | ├── .env.template <- A configuration template file to define environment-specific variables
35 | ├── Makefile <- Makefile with commands and type `make` to get the command list
36 | ├── README.md <- The top-level README for developers using this project
37 | ├── data <- Any data for local development and testing
38 | │ └── openimage_10.txt <- A sample test data that contains the gcs file path for each image
39 | ├── pyproject.toml <- The TOML format Python project configuration file
40 | ├── requirements.dev.txt <- Packages for the development such as `pytest`
41 | ├── requirements.prod.txt <- Packages for the production environment and produces `requirements.txt`
42 | ├── scripts <- utility bash scripts
43 | ├── setup.py <- Used in `python setup.py sdist` to create the multi-file python package
44 | ├── my_project <- Source code for use in this project, also your python package module name
45 | │ ├── __init__.py <- Makes my_project a Python package
46 | │ ├── config.py <- `pydantic` model classes to define sources, sinks, and models
47 | │ ├── pipeline.py <- Builds the Beam RunInference pipeline
48 | │ └── run.py <- A run module to parse the command options and run the Beam pipeline
49 | ├── tensor_rt.Dockerfile <- A Dockerfile to create a customer container with TensorRT
50 | └── tests <- Tests to cover local developments
51 | └── test_pipeline.py
52 | ```
53 |
54 | ## User Guide
55 |
56 | **This process is only tested on GCE VMs with Debian.**
57 |
58 | ### Step 1: Clone this repo and edit .env
59 |
60 | ```bash
61 | git clone https://github.com/google/dataflow-ml-starter.git
62 | cd df-ml-starter
63 | cp .env.template .env
64 | ```
65 | Use your editor to fill in the information in the `.env` file.
66 |
67 | If you want to try other pytorch models under `gs://apache-beam-ml/models/`,
68 | ```bash
69 | gsutil ls gs://apache-beam-ml/models/
70 | ```
71 | you need to edit `config.py` to add more model names.
72 |
73 | It is highly recommended to run through this guide once using `mobilenet_v2` for image classification.
74 |
75 | All the useful actions can be triggered using `make`:
76 | ```console
77 | $ make
78 |
79 | make targets:
80 |
81 | check-beam Check whether Beam is installed on GPU using VM with Custom Container
82 | check-pipeline Check whether the Beam pipeline can run on GPU using VM with Custom Container and DirectRunner
83 | check-tf-gpu Check whether Tensorflow works on GPU using VM with Custom Container
84 | check-torch-gpu Check whether PyTorch works on GPU using VM with Custom Container
85 | clean Remove virtual environment, downloaded models, etc
86 | clean-lite Remove pycache files, pytest files, etc
87 | create-flex-template Create a Flex Template file using a Flex Template custom container
88 | create-vm Create a VM with GPU to test the docker image
89 | delete-vm Delete a VM
90 | docker Build a custom docker image and push it to Artifact Registry
91 | format Run formatter on source code
92 | help Print this help
93 | init Init virtual environment
94 | init-venv Create virtual environment in venv folder
95 | lint Run linter on source code
96 | run-df-cpu Run a Dataflow job with CPUs and without Custom Container
97 | run-df-gpu Run a Dataflow job using the custom container with GPUs
98 | run-df-gpu-flex Run a Dataflow job using the Flex Template
99 | run-direct Run a local test with DirectRunner
100 | test Run tests
101 | test-latest-env Replace the Beam vesion with the latest version (including release candidates)
102 | ```
103 |
104 | ### Pipeline Details
105 |
106 | This project contains a simple RunInference Beam pipeline,
107 | ```
108 | Read the GCS file that contains image GCS paths (beam.io.ReadFromText) ->
109 | Pre-process the input image, run a Pytorch or Tensorflow image classification model, post-process the results -->
110 | Write all predictions back to the GCS output file
111 | ```
112 | The input image data is created from the ImageNet images.
113 |
114 | The entire code flows in this way:
115 |
116 | * `.env` defines the environment variables such as Torch or TF models, model name, Dockerfile template, etc.
117 | * `Makefile` reads these environment variables from `.env` and based on the make targets, it can run tests, build docker images, run Dataflow jobs with CPUs or GPUs.
118 | * `run.py` is called by the`Makefile` targets to parse the input arguments and set `ModelConfig`, `SourceConfig`, and `SinkConfig` defined in `config.py`, then calls `build_pipeline` from `pipeline.py` to build the final Beam pipeline
119 |
120 |
121 | To customize the pipeline, modify `build_pipeline` in [pipeline.py](https://github.com/google/dataflow-ml-starter/blob/main/my_project/pipeline.py). It defines how to read the image data from TextIO, pre-process the images, score them, post-process the predictions,
122 | and at last save the results using TextIO.
123 |
124 | [config.py](https://github.com/google/dataflow-ml-starter/blob/main/my_project/config.py) contains a set of `pydantic` models to specify the configurations for sources, sinks, and models and validate them. Users can easily add more Pytorch classification models. [Here](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/inference) contains more examples.
125 |
126 | ### `.env` Details
127 |
128 | Most of options are configured by the `.env` file.
129 | Below is one example to use the Pytorch `mobilenet_v2` model for image classification:
130 | ```
131 | ################################################################################
132 | ### PYTHON SDK SETTINGS
133 | ################################################################################
134 | PYTHON_VERSION=3.10
135 | BEAM_VERSION=2.48.0
136 | DOCKERFILE_TEMPLATE=pytorch_gpu.Dockerfile
137 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
138 | ################################################################################
139 | ### GCP SETTINGS
140 | ################################################################################
141 | PROJECT_ID=apache-beam-testing
142 | REGION=us-central1
143 | DISK_SIZE_GB=50
144 | MACHINE_TYPE=n1-standard-2
145 | VM_NAME=beam-ml-starter-gpu-1
146 | ################################################################################
147 | ### DATAFLOW JOB SETTINGS
148 | ################################################################################
149 | STAGING_LOCATION=gs://temp-storage-for-perf-tests/loadtests
150 | TEMP_LOCATION=gs://temp-storage-for-perf-tests/loadtests
151 | CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/xqhu/pytorch_gpu:latest
152 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
153 | ################################################################################
154 | ### DATAFLOW JOB MODEL SETTINGS
155 | ################################################################################
156 | MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth"
157 | MODEL_NAME=mobilenet_v2
158 | ################################################################################
159 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
160 | ################################################################################
161 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
162 | OUTPUT_DATA="gs://temp-storage-for-end-to-end-tests/torch/result_gpu_xqhu.txt"
163 | ```
164 | Most of options are intuitive. `DOCKERFILE_TEMPLATE` provides the Dockerfile template that will be used to build the custom container. `CUSTOM_CONTAINER_IMAGE` is the Docker image storage location.
165 | In default, we use GPUs (i.e., T4) with the custom container defined by `SERVICE_OPTIONS` for this Dataflow job. `MODEL_STATE_DICT_PATH` and `MODEL_NAME` defines the Pytorch model information. For this Beam pipeline, we use the GCS buckets for input and output data.
166 |
167 | ### Custom container
168 | We provide three Dockerfile templates as examples to show how to build a custom container:
169 | |Name|Description|
170 | |---|---|
171 | |tensor_rt.Dockerfile| TensorRT + Python 3.8|
172 | |pytorch_gpu.Dockerfile| Pytorch with GPUs + Python 3.10|
173 | |tensorflow_gpu.Dockerfile | Tensorflow with GPUs + Python 3.8|
174 |
175 | Note You should keep your local Python environment same as the one defined in Dockerfile.
176 | These Dockerfile examples should be customized based on your project requirements.
177 |
178 | ### Step 2: Initialize a venv for your project
179 | ```bash
180 | make init
181 | source venv/bin/activate
182 | ```
183 | Note you must make sure the base Python version matches the version defined in `.env`.
184 | The base python can be configured using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/linux.html), e.g.,
185 | ```bash
186 | conda create --name py38 python=3.8
187 | conda activate py38
188 | ```
189 | If anything goes wrong, you can rebuild the `venv`,
190 | ```bash
191 | make clean
192 | make init
193 | ```
194 | To check the `venv` is created correctly,
195 | ```bash
196 | make test
197 | ```
198 |
199 | ### Step 3: Test the Beam pipeline using DirectRunner
200 | `DirectRunner` provides the local way to validate whether your Beam pipeline works correctly,
201 | ```bash
202 | make run-direct
203 | ```
204 |
205 | ### Step 4: Run the Beam pipeline using DataflowRunner
206 | To run a Dataflow job using CPUs without a custom container, try this:
207 | ```bash
208 | make run-df-cpu
209 | ```
210 | When using resnet101 to score 50k images, the job took ~30m and cost ~1.4$ with resnet101.
211 | For `mobilenet_v2`, it cost 0.5$ with ~22m.
212 | Note the cost and time depends on your job settings and the regions.
213 |
214 | #### Build Custom Container with GPU supports
215 | Running Dataflow GPU jobs needs to build a custom container,
216 | ```bash
217 | make docker
218 | ```
219 | The final docker image will be pushed to Artifact Registry. For this guide,
220 | we use `tensor_rt.Dockerfile` to demonstrate how to build the container to run the inference on GPUs with TensorRT.
221 | **Note given the base image issue for TensorRT, only Python 3.8 should be used when running GPUs.**
222 | You can follow [this doc](https://cloud.google.com/dataflow/docs/gpu/use-gpus#custom-container) to create other GPU containers.
223 |
224 | #### Test Custom Container using GCE VM
225 | It is highly recommended to test your custom container locally before running it with Dataflow,
226 | ```bash
227 | make create-vm
228 | ```
229 | This creates a GCE VM with a T4 GPU and installs nvidia driver. It will take a few minutes.
230 | Now using this VM allows you to test whether the docker container is built correctly,
231 | ```bash
232 | # check whether Beam is installed in Custom Container
233 | make check-beam
234 | # check whether Tensorflow can use GPUs in Custom Container
235 | make check-tf-gpu
236 | # check whether PyTorch can use GPUs in Custom Container
237 | make check-torch-gpu
238 | # check whether DirectRunner can run on GPUs in Custom Container
239 | make check-pipeline
240 | ```
241 | Note these commands will take some time to download your container.
242 | You should see outputs similar to these:
243 | ```bash
244 | Checking Python version on VM...
245 | Python 3.8.10
246 | Checking venv exists on VM...
247 | python3-venv/now 3.8.2-0ubuntu2 amd64 [installed,local]
248 | Checking Beam Version on VM...
249 | 2.48.0
250 | Checking Tensorflow on GPU...
251 | [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
252 | Checking PyTorch on GPU...
253 | True
254 | Tesla T4
255 | ...
256 | The DirectRunner run succeeded on GPU!
257 | ```
258 | The last line will display whether the pipeline can run successfully on VM GPUs in Custom Container.
259 |
260 | After finishing tests, you can remove this VM,
261 | ```bash
262 | make delete-vm
263 | ```
264 |
265 | #### Run the Beam pipeline using DataflowRunner on GPUs
266 | This runs a Dataflow job with GPUs,
267 | ```bash
268 | make run-df-gpu
269 | ```
270 | When using resnet101 to score 50k images, the job took ~1h and cost ~0.5$ with resnet101.
271 | For `mobilenet_v2`, it cost 0.05$ with ~1h.
272 | Note the cost and time depends on your job settings and the regions.
273 |
274 | ### Run the Beam pipeline with the Pub/Sub source
275 | When `INPUT_DATA` from the `.env` file defines a valid Pub/Sub topic (e.g., `projects/apache-beam-testing/topics/Imagenet_openimage_50k_benchmark`),
276 | the Beam pipeline is created using the Pub/Sub source with `FixedWindows` and switches to `beam.io.fileio.WriteToFiles` that supports the streaming pipeline.
277 | Note for this toy example, writing the predictions to a GCS bucket is not efficient since the file size is quite small with few bytes.
278 | In practice, you might tune up [the autoscaling options](https://cloud.google.com/dataflow/docs/guides/troubleshoot-autoscaling) to optimize the streaming pipeline performance.
279 | Note that the streaming job will run forever until it is canceled or drained.
280 |
281 | ### Run the Beam pipeline with Dataflow Flex Templates
282 | If you prefer to package all your code into a custom container and allow users to easily access your Beam pipeline,
283 | Dataflow Flex Template could be handy to create and run a Flex Template job using Google Cloud CLI or Google Cloud console.
284 | More importantly, building the flex templates container from the custom SDK container image can produce a reproducible launch environment that is [compatible with the runtime environment](https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/#make-the-launch-environment-compatible-with-the-runtime-environment).
285 | (More benefits about templates are [here](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates#benefits).)
286 |
287 | Since the custom container is already created, it is straightforward to adapt Dataflow Flex Templates:
288 | 1. create a [`metadata.json`](https://github.com/google/dataflow-ml-starter/blob/main/flex/metadata.json) file that contains the parameters required by your Beam pipeline. In this example, we can add `input`, `output`, `device`, `model_name`, `model_state_dict_path`, and `tf_model_uri` as the parameters that can be passed in by users. [Here](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates#example-metadata-file) is another example metadata file.
289 | 2. convert the custom container to your template container following [this](https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates#use_custom_container_images). [`tensorflow_gpu.flex.Dockerfile`](https://github.com/google/dataflow-ml-starter/blob/main/tensorflow_gpu.flex.Dockerfile) is one example converted from `tensorflow_gpu.Dockerfile`. Only two parts are needed: switch to the Dataflow Template launcher entrypoint and package `my_project` into this container. Change `CUSTOM_CONTAINER_IMAGE` in `.env` and run `make docker` to create the custom container for Flex Templates.
290 | 3. `make create-flex-template` creates a template spec file in a Cloud Storage bucket defined by the env `TEMPLATE_FILE_GCS_PATH` that contains all of the necessary information to run the job, such as the SDK information and metadata. This calls the CLI `gcloud dataflow flex-template build`.
291 | 4. `make run-df-gpu-flex` runs a Flex Template pipeline using the spec file from `TEMPLATE_FILE_GCS_PATH`. This calls the CLI `gcloud dataflow flex-template run`.
292 |
293 | More information about Flex Templates can be found [here](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates).
294 |
295 |
296 | ## FAQ
297 |
298 | ### Permission error when using any GCP command
299 | ```bash
300 | gcloud auth login
301 | gcloud auth application-default login
302 | # replace it with the appropriate region
303 | gcloud auth configure-docker us-docker.pkg.dev
304 | # or if you use docker-credential-gcr
305 | docker-credential-gcr configure-docker --registries=us-docker.pkg.dev
306 | ```
307 | Make sure you specify the appropriate region for Artifact Registry.
308 |
309 | ### AttributeError: Can't get attribute 'default_tensor_inference_fn'
310 | ```
311 | AttributeError: Can't get attribute 'default_tensor_inference_fn' on
312 | ```
313 | This error indicates your Dataflow job uses the old Beam SDK. If you use `--sdk_location container`, it means your Docker container has the old Beam SDK.
314 |
315 | ### QUOTA_EXCEEDED
316 | ```
317 | Startup of the worker pool in zone us-central1-a failed to bring up any of the desired 1 workers. Please refer to https://cloud.google.com/dataflow/docs/guides/common-errors#worker-pool-failure for help troubleshooting. QUOTA_EXCEEDED: Instance 'benchmark-tests-pytorch-i-05041052-ufe3-harness-ww4n' creation failed: Quota 'NVIDIA_T4_GPUS' exceeded. Limit: 32.0 in region us-central1.
318 | ```
319 | Please check https://cloud.google.com/compute/docs/regions-zones and select another zone with your desired machine type to relaunch the Dataflow job.
320 |
321 | ### ERROR: failed to solve: failed to fetch anonymous token: unexpected status: 401 Unauthorized
322 | ```
323 | failed to solve with frontend dockerfile.v0: failed to create LLB definition: failed to authorize: rpc error: code = Unknown desc = failed to fetch anonymous token: unexpected status: 401 Unauthorized
324 | ```
325 | Restarting the docker could resolve this issue.
326 |
327 | ### Check the built container
328 | ```bash
329 | docker run --rm -it --entrypoint=/bin/bash $CUSTOM_CONTAINER_IMAGE
330 | ```
331 |
332 | ### Errors could happen when the custom container is not built correctly
333 |
334 | Check Cloud Logs, pay attention to INFO for Worker logs:
335 | ```
336 | INFO 2023-05-06T15:13:01.237562007Z The virtual environment was not created successfully because ensurepip is not
337 | INFO 2023-05-06T15:13:01.237601258Z available. On Debian/Ubuntu systems, you need to install the python3-venv
338 | INFO 2023-05-06T15:13:01.237607714Z package using the following command.
339 | ```
340 | or (might be caused by building the container on Mac OS)
341 | ```
342 | exec /opt/apache/beam/boot: no such file or directory
343 | ```
344 |
345 | ## Useful Links
346 | * https://cloud.google.com/dataflow/docs/guides/using-custom-containers#docker
347 | * https://cloud.google.com/dataflow/docs/gpu/use-gpus#custom-container
348 | * https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/
349 | * https://github.com/apache/beam/blob/master/.test-infra/jenkins/job_InferenceBenchmarkTests_Python.groovy
350 | * https://cloud.google.com/dataflow/docs/gpu/troubleshoot-gpus#debug-vm
351 | * https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/dataflow/flex-templates/streaming_beam
352 | * https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates
353 | * https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates#use_custom_container_images
--------------------------------------------------------------------------------
/data/openimage_10.txt:
--------------------------------------------------------------------------------
1 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec63d33df5e91fd.jpg
2 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec64bfcb2d515c9.jpg
3 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec67f239007cb18.jpg
4 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6988f60e8e881.jpg
5 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6bf3c1551224a.jpg
6 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6c1055bae51f5.jpg
7 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6c15d60358c85.jpg
8 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec6ca007effdd80.jpg
9 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec7096df9477315.jpg
10 | gs://apache-beam-ml/datasets/openimage_50k_benchmark/1ec70b2abe194c75.jpg
11 |
--------------------------------------------------------------------------------
/flex/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Beam RunInference Python flex template",
3 | "description": "Beam RunInference example for python flex template.",
4 | "parameters": [
5 | {
6 | "name": "input",
7 | "label": "Input data",
8 | "helpText": "Input image URI data that could be a GCS bucket or pub/sub topic"
9 | },
10 | {
11 | "name": "output",
12 | "label": "Output GCS bucket path",
13 | "helpText": "A GCS bucket that stores the model predictions"
14 | },
15 | {
16 | "name": "tf_model_uri",
17 | "label": "TensorFlow model URI",
18 | "helpText": "A valid TensorFlow model URI",
19 | "isOptional": true
20 | },
21 | {
22 | "name": "model_name",
23 | "label": "a Pytorch model name",
24 | "helpText": "A model name, e.g. resnet101",
25 | "isOptional": true
26 | },
27 | {
28 | "name": "model_state_dict_path",
29 | "label": "a Pytorch model state path",
30 | "helpText": "Path to the model's state_dict",
31 | "isOptional": true
32 | },
33 | {
34 | "name": "device",
35 | "label": "device to run models",
36 | "helpText": "device could be either CPU or GPU",
37 | "isOptional": true
38 | },
39 | {
40 | "name": "disk_size_gb",
41 | "label": "disk_size_gb",
42 | "helpText": "disk_size_gb for worker",
43 | "isOptional": true
44 | },
45 | {
46 | "name": "dataflow_service_option",
47 | "label": "dataflow_service_option",
48 | "helpText": "dataflow_service_option for worker",
49 | "isOptional": true
50 | }
51 | ]
52 | }
--------------------------------------------------------------------------------
/my_project/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/my_project/config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # standard libraries
16 | import re
17 | from enum import Enum
18 |
19 | # third party libraries
20 | from pydantic import BaseModel, Field, root_validator, validator
21 |
22 |
23 | class ModelName(str, Enum):
24 | RESNET101 = "resnet101"
25 | MOBILENET_V2 = "mobilenet_v2"
26 |
27 |
28 | class ModelConfig(BaseModel):
29 | model_state_dict_path: str = Field(None, description="path that contains the torch model state directory")
30 | model_class_name: ModelName = Field(None, description="Reference to the class definition of the model.")
31 | model_params: dict = Field(
32 | None,
33 | description="Parameters passed to the constructor of the model_class. "
34 | "These will be used to instantiate the model object in the RunInference API.",
35 | )
36 | tf_model_uri: str = Field(None, description="TF model uri from https://tfhub.dev/")
37 | device: str = Field("CPU", description="Device to be used on the Runner. Choices are (CPU, GPU)")
38 | min_batch_size: int = 10
39 | max_batch_size: int = 100
40 |
41 | @root_validator
42 | def validate_fields(cls, values):
43 | v = values.get("model_state_dict_path")
44 | if v and values.get("tf_model_uri"):
45 | raise ValueError("Cannot specify both model_state_dict_path and tf_model_uri")
46 | if v is None and values.get("tf_model_uri") is None:
47 | raise ValueError("At least one of model_state_dict_path or tf_model_uri must be specified")
48 | if v and values.get("model_class_name") is None:
49 | raise ValueError("model_class_name must be specified when using model_state_dict_path")
50 | if v and values.get("model_params") is None:
51 | raise ValueError("model_params must be specified when using model_state_dict_path")
52 | return values
53 |
54 |
55 | def _validate_topic_path(topic_path):
56 | pattern = r"projects/.+/topics/.+"
57 | return bool(re.match(pattern, topic_path))
58 |
59 |
60 | class SourceConfig(BaseModel):
61 | input: str = Field(..., description="the input path to a text file or a Pub/Sub topic")
62 | images_dir: str = Field(
63 | None,
64 | description="Path to the directory where images are stored."
65 | "Not required if image names in the input file have absolute path.",
66 | )
67 | streaming: bool = False
68 |
69 | @validator("streaming", pre=True, always=True)
70 | def set_streaming(cls, v, values):
71 | return _validate_topic_path(values["input"])
72 |
73 |
74 | class SinkConfig(BaseModel):
75 | output: str = Field(..., description="the output path to save results as a text file")
76 |
--------------------------------------------------------------------------------
/my_project/pipeline.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """A pipeline that uses RunInference API to perform image classification."""
16 |
17 | # standard libraries
18 | import io
19 | import os
20 | from typing import Iterable, Iterator, Optional, Tuple, Union
21 |
22 | # third party libraries
23 | import apache_beam as beam
24 | import numpy as np
25 | import torch
26 | import torch.nn as nn
27 | from apache_beam.io.filesystems import FileSystems
28 | from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference
29 | from apache_beam.ml.inference.pytorch_inference import PytorchModelHandlerTensor
30 | from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerTensor
31 | from PIL import Image
32 | from torchvision import models, transforms
33 |
34 | # Dataflow ML libraries
35 | from my_project.config import ModelConfig, ModelName, SinkConfig, SourceConfig
36 |
37 | import tensorflow as tf # isort:skip
38 |
39 |
40 | def get_model_class(model_name: ModelName) -> nn.Module:
41 | model_dict = {ModelName.RESNET101: models.resnet101, ModelName.MOBILENET_V2: models.mobilenet_v2}
42 |
43 | model_class = model_dict.get(model_name)
44 | if not model_class:
45 | raise ValueError(f"cannot recognize the model {model_name}")
46 | return model_class
47 |
48 |
49 | def read_image(image_file_name: Union[str, bytes], path_to_dir: Optional[str] = None) -> Tuple[str, Image.Image]:
50 | if isinstance(image_file_name, bytes):
51 | image_file_name = image_file_name.decode()
52 | if path_to_dir is not None:
53 | image_file_name = os.path.join(path_to_dir, image_file_name)
54 | with FileSystems().open(image_file_name, "r") as file:
55 | data = Image.open(io.BytesIO(file.read())).convert("RGB")
56 | return image_file_name, data
57 |
58 |
59 | def preprocess_image(data: Image.Image) -> torch.Tensor:
60 | image_size = (224, 224)
61 | # Pre-trained PyTorch models expect input images normalized with the
62 | # below values (see: https://pytorch.org/vision/stable/models.html)
63 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
64 | transform = transforms.Compose(
65 | [
66 | transforms.Resize(image_size),
67 | transforms.ToTensor(),
68 | normalize,
69 | ]
70 | )
71 | return transform(data)
72 |
73 |
74 | def preprocess_image_for_tf(data: Image.Image) -> tf.Tensor:
75 | # Convert the input image to the type and dimensions required by the model.
76 |
77 | img = data.resize((224, 224))
78 | img = np.array(img) / 255.0
79 |
80 | return tf.cast(tf.convert_to_tensor(img[...]), dtype=tf.float32)
81 |
82 |
83 | def filter_empty_lines(text: str) -> Iterator[str]:
84 | if len(text.strip()) > 0:
85 | yield text
86 |
87 |
88 | class PostProcessor(beam.DoFn):
89 | def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
90 | filename, prediction_result = element
91 | if isinstance(prediction_result.inference, torch.Tensor):
92 | prediction = torch.argmax(prediction_result.inference, dim=0)
93 | else:
94 | prediction = np.argmax(prediction_result.inference)
95 | yield filename + "," + str(prediction.item())
96 |
97 |
98 | def build_pipeline(pipeline, source_config: SourceConfig, sink_config: SinkConfig, model_config: ModelConfig) -> None:
99 | """
100 | Args:
101 | pipeline: a given input pipeline
102 | source_config: a source config
103 | sink_config: a sink config
104 | model_config: a model config to instantiate PytorchModelHandlerTensor
105 | """
106 |
107 | # In this example we pass keyed inputs to RunInference transform.
108 | # Therefore, we use KeyedModelHandler wrapper over PytorchModelHandler or TFModelHandlerTensor.
109 | if model_config.model_state_dict_path:
110 | model_handler = KeyedModelHandler(
111 | PytorchModelHandlerTensor(
112 | state_dict_path=model_config.model_state_dict_path,
113 | model_class=get_model_class(model_config.model_class_name),
114 | model_params=model_config.model_params,
115 | device=model_config.device,
116 | min_batch_size=model_config.min_batch_size,
117 | max_batch_size=model_config.max_batch_size,
118 | )
119 | )
120 | elif model_config.tf_model_uri:
121 | model_handler = KeyedModelHandler(
122 | TFModelHandlerTensor(
123 | model_uri=model_config.tf_model_uri,
124 | device=model_config.device,
125 | min_batch_size=model_config.min_batch_size,
126 | max_batch_size=model_config.max_batch_size,
127 | )
128 | )
129 | else:
130 | raise ValueError("Only support PytorchModelHandler and TFModelHandlerTensor!")
131 |
132 | if source_config.streaming:
133 | # read the text file path from Pub/Sub and use FixedWindows to group these images
134 | # and then run the model inference and store the results into GCS
135 | filename_value_pair = (
136 | pipeline
137 | | "ReadImageNamesFromPubSub" >> beam.io.ReadFromPubSub(topic=source_config.input)
138 | | "Window into fixed intervals" >> beam.WindowInto(beam.window.FixedWindows(60 * 5))
139 | | "ReadImageData" >> beam.Map(lambda image_name: read_image(image_file_name=image_name))
140 | )
141 | else:
142 | # read the text file and create the pair of input data with the file name and its image
143 | filename_value_pair = (
144 | pipeline
145 | | "ReadImageNames" >> beam.io.ReadFromText(source_config.input)
146 | | "FilterEmptyLines" >> beam.ParDo(filter_empty_lines)
147 | | "ReadImageData"
148 | >> beam.Map(lambda image_name: read_image(image_file_name=image_name, path_to_dir=source_config.images_dir))
149 | )
150 |
151 | if model_config.model_state_dict_path:
152 | filename_value_pair = filename_value_pair | "PreprocessImages" >> beam.MapTuple(
153 | lambda file_name, data: (file_name, preprocess_image(data))
154 | )
155 | else:
156 | filename_value_pair = filename_value_pair | "PreprocessImages_TF" >> beam.MapTuple(
157 | lambda file_name, data: (file_name, preprocess_image_for_tf(data))
158 | )
159 |
160 | # do the model inference and postprocessing
161 | predictions = (
162 | filename_value_pair
163 | | "RunInference" >> RunInference(model_handler)
164 | | "ProcessOutput" >> beam.ParDo(PostProcessor())
165 | )
166 |
167 | # combine all the window results into one text for GCS
168 | if source_config.streaming:
169 | (
170 | predictions
171 | | "WriteOutputToGCS"
172 | >> beam.io.fileio.WriteToFiles(sink_config.output, shards=0) # pylint: disable=expression-not-assigned
173 | )
174 | else:
175 | # save the predictions to a text file
176 | predictions | "WriteOutputToGCS" >> beam.io.WriteToText( # pylint: disable=expression-not-assigned
177 | sink_config.output, shard_name_template="", append_trailing_newlines=True
178 | )
179 |
--------------------------------------------------------------------------------
/my_project/run.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """A run module that runs a Beam pipeline to perform image classification."""
16 |
17 | # standard libraries
18 | import argparse
19 | import logging
20 |
21 | # third party libraries
22 | import apache_beam as beam
23 | from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions
24 | from apache_beam.runners.runner import PipelineResult
25 |
26 | # Dataflow ML libraries
27 | from my_project.config import ModelConfig, SinkConfig, SourceConfig
28 | from my_project.pipeline import build_pipeline
29 |
30 |
31 | def parse_known_args(argv):
32 | """Parses args for the workflow."""
33 | parser = argparse.ArgumentParser()
34 | parser.add_argument("--input", dest="input", required=True, help="Path to the text file containing image names.")
35 | parser.add_argument(
36 | "--output", dest="output", required=True, help="Path where to save output predictions." " text file."
37 | )
38 | parser.add_argument(
39 | "--model_state_dict_path", dest="model_state_dict_path", required=False, help="Path to the model's state_dict."
40 | )
41 | parser.add_argument("--model_name", dest="model_name", required=False, help="model name, e.g. resnet101")
42 | parser.add_argument(
43 | "--tf_model_uri", dest="tf_model_uri", required=False, help="tfhub model URI from https://tfhub.dev/"
44 | )
45 | parser.add_argument(
46 | "--images_dir",
47 | default=None,
48 | help="Path to the directory where images are stored."
49 | "Not required if image names in the input file have absolute path.",
50 | )
51 | parser.add_argument(
52 | "--device",
53 | default="CPU",
54 | help="Device to be used on the Runner. Choices are (CPU, GPU).",
55 | )
56 | return parser.parse_known_args(argv)
57 |
58 |
59 | def run(argv=None, save_main_session=True, test_pipeline=None) -> PipelineResult:
60 | """
61 | Args:
62 | argv: Command line arguments defined for this example.
63 | save_main_session: Used for internal testing.
64 | test_pipeline: Used for internal testing.
65 | """
66 | known_args, pipeline_args = parse_known_args(argv)
67 |
68 | # setup configs
69 | model_config = ModelConfig(
70 | model_state_dict_path=known_args.model_state_dict_path,
71 | model_class_name=known_args.model_name,
72 | model_params={"num_classes": 1000},
73 | tf_model_uri=known_args.tf_model_uri,
74 | device=known_args.device,
75 | )
76 |
77 | source_config = SourceConfig(input=known_args.input)
78 | sink_config = SinkConfig(output=known_args.output)
79 |
80 | # setup pipeline
81 | pipeline_options = PipelineOptions(pipeline_args, streaming=source_config.streaming)
82 | pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
83 |
84 | pipeline = test_pipeline
85 | if not test_pipeline:
86 | pipeline = beam.Pipeline(options=pipeline_options)
87 |
88 | # build the pipeline using configs
89 | build_pipeline(pipeline, source_config=source_config, sink_config=sink_config, model_config=model_config)
90 |
91 | # run it
92 | result = pipeline.run()
93 | result.wait_until_finish()
94 | return result
95 |
96 |
97 | if __name__ == "__main__":
98 | logging.getLogger().setLevel(logging.INFO)
99 | run()
100 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | [tool.black]
16 | line-length = 120
17 | include = '\.pyi?$'
18 | exclude = '''
19 |
20 | (
21 | /(
22 | \.eggs # exclude a few common directories in the
23 | | \.git # root of the project
24 | | \.hg
25 | | \.mypy_cache
26 | | \.tox
27 | | \.vscode
28 | | \.idea
29 | | \.ipynb_checkpoints
30 | | \.dvc
31 | | _build
32 | | buck-out
33 | | build
34 | | dist
35 | | venv
36 | | node_modules
37 | )/
38 | | version.py # also separately exclude a file named foo.py in
39 | # the root of the project
40 | )
41 | '''
--------------------------------------------------------------------------------
/pytorch_gpu.Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # This uses Ubuntu with Python 3.10
16 | ARG PYTORCH_SERVING_BUILD_IMAGE=pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
17 |
18 | FROM ${PYTORCH_SERVING_BUILD_IMAGE}
19 |
20 | WORKDIR /workspace
21 |
22 | COPY requirements.txt requirements.txt
23 |
24 | RUN pip install --upgrade pip \
25 | && pip install --no-cache-dir -r requirements.txt \
26 | && rm -f requirements.txt
27 |
28 | # Copy files from official SDK image, including script/dependencies.
29 | COPY --from=apache/beam_python3.10_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
30 |
31 | # Set the entrypoint to Apache Beam SDK launcher.
32 | ENTRYPOINT ["/opt/apache/beam/boot"]
--------------------------------------------------------------------------------
/requirements.dev.txt:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | #building
17 | setuptools>=67.7.2
18 |
19 | # format/lint
20 | flake8>=5.0.4
21 | isort>=5.6.4
22 | pre-commit>=2.9.3
23 | black>=22.3.0
24 |
25 | # test
26 | pytest>=6.2.1
27 | pytest-cov>=2.10.1
28 | pytest-ordering
29 | pytest-env
30 |
--------------------------------------------------------------------------------
/requirements.prod.txt:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | apache-beam[gcp]==${BEAM_VERSION}
16 | pydantic<2.0.0
17 | torch>=1.7.1
18 | torchvision>=0.8.2
19 | pillow>=8.0.0
20 | tensorflow
21 | tensorflow_hub
22 | numpy<2.0.0
23 | pyOpenSSL
24 |
--------------------------------------------------------------------------------
/scripts/check-beam.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2023 Google LLC
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Import environment variables from .env file.
18 | source .env
19 |
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 | echo "The PROJECT_ID environment variable is not set."
23 | exit 1
24 | fi
25 |
26 | if [ -z "${ZONE}" ]; then
27 | echo "The ZONE environment variable is not set."
28 | exit 1
29 | fi
30 |
31 | if [ -z "${VM_NAME}" ]; then
32 | echo "The VM_NAME environment variable is not set."
33 | exit 1
34 | fi
35 |
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 | echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 | exit 1
39 | fi
40 |
41 | echo "Checking Python version on VM..."
42 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
43 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
44 | --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
45 | --privileged $CUSTOM_CONTAINER_IMAGE -c \
46 | \"python --version\""
47 |
48 | echo "Checking venv exists on VM..."
49 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
50 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
51 | --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
52 | --privileged $CUSTOM_CONTAINER_IMAGE -c \
53 | 'apt list --installed | grep python3-venv'"
54 |
55 | echo "Checking Beam Version on VM..."
56 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
57 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
58 | --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
59 | --privileged $CUSTOM_CONTAINER_IMAGE -c \
60 | \"python -c 'import apache_beam as beam; print(beam.__version__)'\""
--------------------------------------------------------------------------------
/scripts/check-pipeline.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2023 Google LLC
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Import environment variables from .env file.
18 | source .env
19 |
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 | echo "The PROJECT_ID environment variable is not set."
23 | exit 1
24 | fi
25 |
26 | if [ -z "${ZONE}" ]; then
27 | echo "The ZONE environment variable is not set."
28 | exit 1
29 | fi
30 |
31 | if [ -z "${VM_NAME}" ]; then
32 | echo "The VM_NAME environment variable is not set."
33 | exit 1
34 | fi
35 |
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 | echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 | exit 1
39 | fi
40 |
41 | vm_ssh="gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command"
42 | vm_scp="gcloud compute scp --strict-host-key-checking=no --project $PROJECT_ID --zone=$ZONE --quiet"
43 |
44 | # Package the local code and copy it to VM
45 | PACKAGE_NAME="my_project-0.0.1"
46 | python3 setup.py sdist
47 | $vm_ssh "sudo rm -fr ~/*"
48 | $vm_scp dist/$PACKAGE_NAME.tar.gz data/openimage_10.txt $VM_NAME:~/
49 | $vm_ssh "tar zxvf $PACKAGE_NAME.tar.gz; mv openimage_10.txt $PACKAGE_NAME"
50 |
51 | # Test the model on GPUs
52 | if [ -z "${TF_MODEL_URI}" ]; then
53 | echo "Running the PyTorch model on GPU..."
54 | $vm_ssh "docker run --entrypoint /bin/bash \
55 | --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
56 | --volume /home/\$USER/:/workspace/\$USER --privileged $CUSTOM_CONTAINER_IMAGE -c \
57 | \"cd \$USER/$PACKAGE_NAME; python -m my_project.run --input openimage_10.txt --output beam-output/beam_test_out.txt --model_state_dict_path $MODEL_STATE_DICT_PATH --model_name $MODEL_NAME --device GPU\""
58 | else
59 | echo "Running the Tensorflow model on GPU..."
60 | $vm_ssh "docker run --entrypoint /bin/bash \
61 | --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
62 | --volume /home/\$USER/:/workspace/\$USER --privileged $CUSTOM_CONTAINER_IMAGE -c \
63 | \"cd \$USER/$PACKAGE_NAME; python -m my_project.run --input openimage_10.txt --output beam-output/beam_test_out.txt --tf_model_uri $TF_MODEL_URI --device GPU\""
64 | fi
65 |
66 | $vm_ssh "[ -f './$PACKAGE_NAME/beam-output/beam_test_out.txt' ] && echo 'The DirectRunner run succeeded on GPU!' || echo 'The DirectRunner run failed on GPU!'"
--------------------------------------------------------------------------------
/scripts/check-tf-on-gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2023 Google LLC
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Import environment variables from .env file.
18 | source .env
19 |
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 | echo "The PROJECT_ID environment variable is not set."
23 | exit 1
24 | fi
25 |
26 | if [ -z "${ZONE}" ]; then
27 | echo "The ZONE environment variable is not set."
28 | exit 1
29 | fi
30 |
31 | if [ -z "${VM_NAME}" ]; then
32 | echo "The VM_NAME environment variable is not set."
33 | exit 1
34 | fi
35 |
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 | echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 | exit 1
39 | fi
40 |
41 | echo "Checking Tensorflow on GPU..."
42 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
43 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
44 | --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
45 | --privileged $CUSTOM_CONTAINER_IMAGE -c \
46 | \"python -c 'import tensorflow as tf; print(tf.config.list_physical_devices())'\""
--------------------------------------------------------------------------------
/scripts/check-torch-on-gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2023 Google LLC
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Import environment variables from .env file.
18 | source .env
19 |
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 | echo "The PROJECT_ID environment variable is not set."
23 | exit 1
24 | fi
25 |
26 | if [ -z "${ZONE}" ]; then
27 | echo "The ZONE environment variable is not set."
28 | exit 1
29 | fi
30 |
31 | if [ -z "${VM_NAME}" ]; then
32 | echo "The VM_NAME environment variable is not set."
33 | exit 1
34 | fi
35 |
36 | if [ -z "${CUSTOM_CONTAINER_IMAGE}" ]; then
37 | echo "The CUSTOM_CONTAINER_IMAGE environment variable is not set."
38 | exit 1
39 | fi
40 |
41 | echo "Checking PyTorch on GPU..."
42 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command \
43 | "docker run --entrypoint /bin/bash --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
44 | --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
45 | --privileged $CUSTOM_CONTAINER_IMAGE -c \
46 | \"python -c 'import torch; print(torch.cuda.is_available()); print(torch.cuda.get_device_name())'\""
--------------------------------------------------------------------------------
/scripts/create-gpu-vm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2023 Google LLC
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Import environment variables from .env file.
18 | source .env
19 |
20 | # Check if the project ID and zone environment variables are set.
21 | if [ -z "${PROJECT_ID}" ]; then
22 | echo "The PROJECT_ID environment variable is not set."
23 | exit 1
24 | fi
25 |
26 | if [ -z "${ZONE}" ]; then
27 | echo "The ZONE environment variable is not set."
28 | exit 1
29 | fi
30 |
31 | if [ -z "${VM_NAME}" ]; then
32 | echo "The VM_NAME environment variable is not set."
33 | exit 1
34 | fi
35 |
36 | if [ -z "${MACHINE_TYPE}" ]; then
37 | echo "The MACHINE_TYPE environment variable is not set."
38 | exit 1
39 | fi
40 |
41 | # Set the number of GPUs to attach to the VM.
42 | GPU_COUNT=1
43 | GPU_TYPE="nvidia-tesla-t4"
44 |
45 | # Create the VM.
46 | echo "Waiting for VM to be created (this will take a few minutes)..."
47 |
48 | gcloud compute instances create $VM_NAME \
49 | --project $PROJECT_ID \
50 | --zone $ZONE \
51 | --machine-type $MACHINE_TYPE \
52 | --accelerator count=$GPU_COUNT,type=$GPU_TYPE \
53 | --image-family cos-stable \
54 | --image-project=cos-cloud \
55 | --maintenance-policy TERMINATE \
56 | --restart-on-failure \
57 | --boot-disk-size=200G \
58 | --scopes=cloud-platform
59 |
60 | # Wait for the VM to be created.
61 | STATUS=""
62 | while [ "$STATUS" != "RUNNING" ]; do
63 | sleep 5
64 | STATUS=$(gcloud compute instances describe $VM_NAME --project $PROJECT_ID --zone=$ZONE --format="value(status)")
65 | done
66 |
67 | echo "VM $VM_NAME is now running."
68 |
69 | # Print the VM's IP address.
70 | echo "VM IP address: $(gcloud compute instances describe $VM_NAME --project $PROJECT_ID --zone=$ZONE --format='value(networkInterfaces[0].accessConfigs[0].natIP)')"
71 |
72 | # Install GPU driver
73 | echo "Installing Nvidia GPU driver..."
74 | gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --tunnel-through-iap --quiet \
75 | --command "cos-extensions install gpu && sudo mount --bind /var/lib/nvidia /var/lib/nvidia && sudo mount -o remount,exec /var/lib/nvidia"
76 |
77 | vm_ssh="gcloud compute ssh --strict-host-key-checking=no $VM_NAME --project $PROJECT_ID --zone=$ZONE --quiet --command"
78 |
79 | echo "Getting the GPU driver information..."
80 | $vm_ssh "/var/lib/nvidia/bin/nvidia-smi"
81 |
82 | # docker-credential-gcr
83 | if [[ -n "$DOCKER_CREDENTIAL_REGISTRIES" ]]; then
84 | echo "HOME is defined."
85 | echo "Authenticating us-docker.pkg.dev..."
86 | $vm_ssh "docker-credential-gcr configure-docker --registries=$DOCKER_CREDENTIAL_REGISTRIES"
87 | fi
--------------------------------------------------------------------------------
/scripts/get_beam_version.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # third party libraries
16 | import requests
17 | from packaging.version import Version
18 |
19 |
20 | def beam_versions(package_name, limit_releases=10):
21 | url = f"https://pypi.org/pypi/{package_name}/json"
22 | data = requests.get(url).json()
23 | versions = list(data["releases"].keys())
24 | versions.sort(key=Version, reverse=True)
25 | return versions[:limit_releases]
26 |
27 |
28 | print("\n".join(beam_versions("apache-beam", 1)))
29 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # standard libraries
16 | import os
17 |
18 | # third party libraries
19 | import setuptools
20 |
21 | required = []
22 | if os.path.exists("requirements.txt"):
23 | with open("requirements.txt") as f:
24 | required = f.read().splitlines()
25 |
26 | setuptools.setup(
27 | name="my_project",
28 | version="0.0.1",
29 | install_requires=required,
30 | packages=["my_project"],
31 | )
32 |
--------------------------------------------------------------------------------
/tensor_rt.Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # This needs Python 3.8 for your local runtime environment
16 | ARG PYTORCH_SERVING_BUILD_IMAGE=nvcr.io/nvidia/pytorch:22.11-py3
17 |
18 | FROM ${PYTORCH_SERVING_BUILD_IMAGE}
19 |
20 | ENV PATH="/usr/src/tensorrt/bin:${PATH}"
21 |
22 | WORKDIR /workspace
23 |
24 | COPY requirements.txt requirements.txt
25 |
26 | ENV DEBIAN_FRONTEND=noninteractive
27 |
28 | RUN apt-get update \
29 | && apt install python3.8 python3.8-venv python3-venv -y \
30 | && pip install --upgrade pip \
31 | && apt-get install ffmpeg libsm6 libxext6 -y --no-install-recommends \
32 | && pip install cuda-python onnx numpy onnxruntime common \
33 | && pip install git+https://github.com/facebookresearch/detectron2.git@5aeb252b194b93dc2879b4ac34bc51a31b5aee13 \
34 | && pip install git+https://github.com/NVIDIA/TensorRT#subdirectory=tools/onnx-graphsurgeon
35 |
36 | RUN pip install --no-cache-dir -r requirements.txt && rm -f requirements.txt
37 |
38 | # Copy files from official SDK image, including script/dependencies.
39 | COPY --from=apache/beam_python3.8_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
40 |
41 | # Set the entrypoint to Apache Beam SDK launcher.
42 | ENTRYPOINT ["/opt/apache/beam/boot"]
--------------------------------------------------------------------------------
/tensorflow_gpu.Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # This needs Python 3.8 for your local runtime environment
16 |
17 | # Select an NVIDIA base image with desired GPU stack from https://ngc.nvidia.com/catalog/containers/nvidia:cuda
18 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
19 |
20 | WORKDIR /workspace
21 |
22 | COPY requirements.txt requirements.txt
23 |
24 | RUN \
25 | # Add Deadsnakes repository that has a variety of Python packages for Ubuntu.
26 | # See: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
27 | apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 \
28 | && echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
29 | && echo "deb-src http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
30 | && apt-get update \
31 | && apt-get install -y curl \
32 | python3.8 \
33 | python3.8-venv \
34 | python3-venv \
35 | # With python3.8 package, distutils need to be installed separately.
36 | python3-distutils \
37 | && rm -rf /var/lib/apt/lists/* \
38 | && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
39 | && curl https://bootstrap.pypa.io/pip/3.8/get-pip.py | python \
40 | && pip install --upgrade pip \
41 | && pip install --no-cache-dir -r requirements.txt \
42 | && pip install --no-cache-dir tensorflow==2.12.1 \
43 | && pip install --no-cache-dir torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118
44 |
45 | # Copy files from official SDK image, including script/dependencies.
46 | COPY --from=apache/beam_python3.8_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
47 |
48 | # Set the entrypoint to Apache Beam SDK launcher.
49 | ENTRYPOINT ["/opt/apache/beam/boot"]
--------------------------------------------------------------------------------
/tensorflow_gpu.flex.Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # This needs Python 3.8 for your local runtime environment
16 |
17 | FROM gcr.io/dataflow-templates-base/flex-template-launcher-image:latest as template_launcher
18 |
19 | # Select an NVIDIA base image with desired GPU stack from https://ngc.nvidia.com/catalog/containers/nvidia:cuda
20 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
21 |
22 | WORKDIR /workspace
23 |
24 | COPY requirements.txt requirements.txt
25 |
26 | RUN \
27 | # Add Deadsnakes repository that has a variety of Python packages for Ubuntu.
28 | # See: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
29 | apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 \
30 | && echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
31 | && echo "deb-src http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" >> /etc/apt/sources.list.d/custom.list \
32 | && apt-get update \
33 | && apt-get install -y curl \
34 | python3.8 \
35 | python3.8-venv \
36 | python3-venv \
37 | # With python3.8 package, distutils need to be installed separately.
38 | python3-distutils \
39 | && rm -rf /var/lib/apt/lists/* \
40 | && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
41 | && curl https://bootstrap.pypa.io/pip/3.8/get-pip.py | python \
42 | && pip install --upgrade pip \
43 | && pip install --no-cache-dir -r requirements.txt \
44 | && pip install --no-cache-dir tensorflow==2.12.1 \
45 | && pip install --no-cache-dir torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118
46 |
47 | # Copy the run module
48 | COPY my_project/ /workspace/my_project
49 | RUN rm -fr /workspace/my_project/__pycache__
50 |
51 | #Specifies which Python file to run to launch the Flex Template.
52 | ENV FLEX_TEMPLATE_PYTHON_PY_FILE="my_project/run.py"
53 |
54 | # Since we already downloaded all the dependencies, there's no need to rebuild everything.
55 | ENV PIP_NO_DEPS=True
56 |
57 | ENV PYTHONPATH "${PYTHONPATH}:/workspace/my_project/"
58 |
59 | # Copy the Dataflow Template launcher
60 | COPY --from=template_launcher /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher
61 |
62 | # Copy files from official SDK image, including script/dependencies.
63 | # Note Python 3.8 is used since the above setup uses Python 3.8.
64 | COPY --from=apache/beam_python3.8_sdk:${BEAM_VERSION} /opt/apache/beam /opt/apache/beam
65 |
66 | # Set the entrypoint to the Dataflow Template launcher
67 | # Use this if the launcher image is different with the custom container image
68 | # ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"]
69 |
70 | # Set the entrypoint to Apache Beam SDK launcher.
71 | ENTRYPOINT ["/opt/apache/beam/boot"]
--------------------------------------------------------------------------------
/tests/sample.env.pytorch:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | ### PYTHON SDK SETTINGS
3 | ################################################################################
4 | PYTHON_VERSION=3.10
5 | BEAM_VERSION=2.60.0
6 | DOCKERFILE_TEMPLATE=pytorch_gpu.Dockerfile
7 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
8 | ################################################################################
9 | ### GCP SETTINGS
10 | ################################################################################
11 | PROJECT_ID=apache-beam-testing
12 | REGION=us-central1
13 | ZONE=us-central1-f
14 | DISK_SIZE_GB=50
15 | MACHINE_TYPE=n1-standard-2
16 | VM_NAME=beam-ml-starter-gpu
17 | ################################################################################
18 | ### DATAFLOW JOB SETTINGS
19 | ################################################################################
20 | STAGING_LOCATION=gs://temp-storage-for-perf-tests/loadtests
21 | TEMP_LOCATION=gs://temp-storage-for-perf-tests/loadtests
22 | CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/dataflow-ml-starter/pytorch_gpu:test
23 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
24 | ################################################################################
25 | ### DATAFLOW JOB MODEL SETTINGS
26 | ################################################################################
27 | MODEL_STATE_DICT_PATH="gs://apache-beam-ml/models/torchvision.models.mobilenet_v2.pth"
28 | MODEL_NAME=mobilenet_v2
29 | ################################################################################
30 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
31 | ################################################################################
32 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
33 | OUTPUT_DATA="gs://temp-storage-for-end-to-end-tests/temp-storage-for-end-to-end-tests/dataflow-ml-starter/result_gpu.txt"
--------------------------------------------------------------------------------
/tests/sample.env.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | ### PYTHON SDK SETTINGS
3 | ################################################################################
4 | PYTHON_VERSION=3.8
5 | BEAM_VERSION=2.48.0
6 | DOCKERFILE_TEMPLATE=tensorflow_gpu.Dockerfile
7 | DOCKER_CREDENTIAL_REGISTRIES="us-docker.pkg.dev"
8 | ################################################################################
9 | ### GCP SETTINGS
10 | ################################################################################
11 | PROJECT_ID=apache-beam-testing
12 | REGION=us-central1
13 | ZONE=us-central1-f
14 | DISK_SIZE_GB=50
15 | MACHINE_TYPE=n1-standard-2
16 | VM_NAME=beam-ml-starter-gpu
17 | ################################################################################
18 | ### DATAFLOW JOB SETTINGS
19 | ################################################################################
20 | STAGING_LOCATION=gs://temp-storage-for-perf-tests/loadtests
21 | TEMP_LOCATION=gs://temp-storage-for-perf-tests/loadtests
22 | CUSTOM_CONTAINER_IMAGE=us-docker.pkg.dev/apache-beam-testing/dataflow-ml-starter/tf_gpu:test
23 | SERVICE_OPTIONS="worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver"
24 | ################################################################################
25 | ### DATAFLOW JOB MODEL SETTINGS
26 | ################################################################################
27 | #TF_MODEL_URI: only support TF2 models (https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2)
28 | TF_MODEL_URI=https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4
29 | ################################################################################
30 | ### DATAFLOW JOB INPUT&OUTPUT SETTINGS
31 | ################################################################################
32 | INPUT_DATA="gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt"
33 | OUTPUT_DATA="gs://temp-storage-for-end-to-end-tests/temp-storage-for-end-to-end-tests/dataflow-ml-starter/result_gpu.txt"
--------------------------------------------------------------------------------
/tests/test_pipeline.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Google LLC
2 |
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 |
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # standard libraries
16 | from pathlib import Path
17 |
18 | # third party libraries
19 | import apache_beam as beam
20 |
21 | # Dataflow ML libraries
22 | # dfml libraries
23 | from my_project.config import ModelConfig, SinkConfig, SourceConfig
24 | from my_project.pipeline import build_pipeline
25 |
26 | DATA_FILE_PATH = Path(__file__).parent.parent / "data"
27 |
28 |
29 | def test_build_pipeline():
30 | model_config = ModelConfig(
31 | model_state_dict_path="gs://apache-beam-ml/models/torchvision.models.resnet101.pth",
32 | model_class_name="resnet101",
33 | model_params={"num_classes": 1000},
34 | )
35 | source_config = SourceConfig(input=str(DATA_FILE_PATH / "openimage_10.txt"))
36 | sink_config = SinkConfig(output="beam-output/my_output.txt")
37 |
38 | p = beam.Pipeline()
39 | build_pipeline(p, source_config=source_config, sink_config=sink_config, model_config=model_config)
40 |
41 |
42 | def test_build_pipeline_with_tf():
43 | model_config = ModelConfig(
44 | tf_model_uri="https://tfhub.dev/google/imagenet/mobilenet_v1_075_192/quantops/classification/3",
45 | )
46 | source_config = SourceConfig(input=str(DATA_FILE_PATH / "openimage_10.txt"))
47 | sink_config = SinkConfig(output="beam-output/my_output.txt")
48 |
49 | p = beam.Pipeline()
50 | build_pipeline(p, source_config=source_config, sink_config=sink_config, model_config=model_config)
51 |
52 |
53 | def test_source_config_streaming():
54 | source_config = SourceConfig(input=str(DATA_FILE_PATH / "openimage_10.txt"))
55 | assert source_config.streaming is False
56 | source_config = SourceConfig(input="projects/apache-beam-testing/topics/Imagenet_openimage_50k_benchmark")
57 | assert source_config.streaming is True
58 |
--------------------------------------------------------------------------------