├── .github ├── conventional-commit-lint.yaml ├── release-please.yml ├── renovate.json ├── trusted-contribution.yml └── workflows │ ├── lint.yaml │ └── stale.yml ├── .gitignore ├── .kitchen.yml ├── CHANGELOG.md ├── CODEOWNERS ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── build ├── int.cloudbuild.yaml └── lint.cloudbuild.yaml ├── doc └── upgrading_to_v3.0.md ├── examples ├── dlp_api_example │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ └── variables.tf └── simple_example │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── metadata.display.yaml ├── metadata.yaml ├── modules ├── dataflow_bucket │ ├── README.md │ ├── main.tf │ ├── metadata.display.yaml │ ├── metadata.yaml │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── flex │ ├── README.md │ ├── main.tf │ ├── metadata.display.yaml │ ├── metadata.yaml │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf └── legacy │ ├── README.md │ ├── main.tf │ ├── metadata.display.yaml │ ├── metadata.yaml │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf └── test ├── .gitignore ├── fixtures └── simple_example │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── integration └── simple_example │ ├── controls │ └── gcloud.rb │ └── inspec.yml └── setup ├── .gitignore ├── iam.tf ├── main.tf ├── outputs.tf ├── variables.tf └── versions.tf /.github/conventional-commit-lint.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # NOTE: This file is automatically generated from: 16 | # https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/blob/main/infra/terraform/test-org/github 17 | 18 | enabled: true 19 | always_check_pr_title: true 20 | -------------------------------------------------------------------------------- /.github/release-please.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | releaseType: terraform-module 16 | handleGHRelease: true 17 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["github>GoogleCloudPlatform/cloud-foundation-toolkit//infra/terraform/test-org/github/resources/renovate"] 4 | } 5 | -------------------------------------------------------------------------------- /.github/trusted-contribution.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # NOTE: This file is automatically generated from: 16 | # https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/blob/main/infra/terraform/test-org/github 17 | 18 | annotations: 19 | - type: comment 20 | text: "/gcbrun" 21 | trustedContributors: 22 | - release-please[bot] 23 | - renovate[bot] 24 | - renovate-bot 25 | - forking-renovate[bot] 26 | - dependabot[bot] 27 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # NOTE: This file is automatically generated from values at: 16 | # https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/blob/main/infra/terraform/test-org/org/locals.tf 17 | 18 | name: 'lint' 19 | 20 | on: 21 | workflow_dispatch: 22 | pull_request: 23 | branches: 24 | - main 25 | 26 | concurrency: 27 | group: '${{ github.workflow }}-${{ github.head_ref || github.ref }}' 28 | cancel-in-progress: true 29 | 30 | jobs: 31 | lint: 32 | name: 'lint' 33 | runs-on: 'ubuntu-latest' 34 | steps: 35 | - uses: 'actions/checkout@v4' 36 | - id: variables 37 | run: | 38 | MAKEFILE=$(find . -name Makefile -print -quit) 39 | if [ -z "$MAKEFILE" ]; then 40 | echo dev-tools=gcr.io/cloud-foundation-cicd/cft/developer-tools:1 >> "$GITHUB_OUTPUT" 41 | else 42 | VERSION=$(grep "DOCKER_TAG_VERSION_DEVELOPER_TOOLS := " $MAKEFILE | cut -d\ -f3) 43 | IMAGE=$(grep "DOCKER_IMAGE_DEVELOPER_TOOLS := " $MAKEFILE | cut -d\ -f3) 44 | REGISTRY=$(grep "REGISTRY_URL := " $MAKEFILE | cut -d\ -f3) 45 | echo dev-tools=${REGISTRY}/${IMAGE}:${VERSION} >> "$GITHUB_OUTPUT" 46 | fi 47 | - run: docker run --rm -e ENABLE_BPMETADATA -v ${{ github.workspace }}:/workspace ${{ steps.variables.outputs.dev-tools }} module-swapper 48 | env: 49 | ENABLE_BPMETADATA: 1 50 | 51 | - run: docker run --rm -e ENABLE_BPMETADATA -v ${{ github.workspace }}:/workspace ${{ steps.variables.outputs.dev-tools }} /usr/local/bin/test_lint.sh 52 | env: 53 | ENABLE_BPMETADATA: 1 54 | 55 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # NOTE: This file is automatically generated from: 16 | # https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/blob/main/infra/terraform/test-org/github 17 | 18 | name: "Close stale issues" 19 | on: 20 | schedule: 21 | - cron: "0 23 * * *" 22 | 23 | jobs: 24 | stale: 25 | if: github.repository_owner == 'GoogleCloudPlatform' || github.repository_owner == 'terraform-google-modules' 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/stale@v9 29 | with: 30 | repo-token: ${{ secrets.GITHUB_TOKEN }} 31 | stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 7 days' 32 | stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 7 days' 33 | exempt-issue-labels: 'triaged' 34 | exempt-pr-labels: 'dependencies,autorelease: pending' 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OSX leaves these everywhere on SMB shares 2 | ._* 3 | 4 | # OSX trash 5 | .DS_Store 6 | 7 | # Python 8 | *.pyc 9 | 10 | # Emacs save files 11 | *~ 12 | \#*\# 13 | .\#* 14 | 15 | # Vim-related files 16 | [._]*.s[a-w][a-z] 17 | [._]s[a-w][a-z] 18 | *.un~ 19 | Session.vim 20 | .netrwhist 21 | 22 | ### https://raw.github.com/github/gitignore/90f149de451a5433aebd94d02d11b0e28843a1af/Terraform.gitignore 23 | 24 | # Local .terraform directories 25 | **/.terraform/* 26 | 27 | # .tfstate files 28 | *.tfstate 29 | *.tfstate.* 30 | 31 | # Crash log files 32 | crash.log 33 | 34 | # Kitchen files 35 | **/inspec.lock 36 | **/.kitchen 37 | **/kitchen.local.yml 38 | **/Gemfile.lock 39 | 40 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 41 | # .tfvars files are managed as part of configuration and so should be included in 42 | # version control. 43 | **/*.tfvars 44 | 45 | credentials.json 46 | 47 | **/.terraform/ 48 | **/backend.tf 49 | **/values-*.yaml 50 | **/*.tfplan 51 | **/credentials.json 52 | 53 | # tf lock file 54 | .terraform.lock.hcl 55 | -------------------------------------------------------------------------------- /.kitchen.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | --- 16 | driver: 17 | name: "terraform" 18 | command_timeout: 1800 19 | verify_version: false 20 | 21 | provisioner: 22 | name: "terraform" 23 | 24 | platforms: 25 | - name: local 26 | 27 | suites: 28 | - name: "simple_example" 29 | driver: 30 | name: "terraform" 31 | command_timeout: 1800 32 | root_module_directory: test/fixtures/simple_example/ 33 | verifier: 34 | name: terraform 35 | color: false 36 | systems: 37 | - name: simple_example 38 | backend: local 39 | controls: 40 | - gcloud 41 | - gcloud_dataflow 42 | - gsutil 43 | provisioner: 44 | name: terraform 45 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/) and this 6 | project adheres to [Semantic Versioning](http://semver.org/). 7 | 8 | ## [3.0.2](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v3.0.1...v3.0.2) (2025-03-26) 9 | 10 | 11 | ### Bug Fixes 12 | 13 | * Removing connection and adding alternate defaults ([#95](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/95)) ([76ce39a](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/76ce39a7a048fc4f150bf6bd4c117ef171c00af4)) 14 | 15 | ## [3.0.1](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v3.0.0...v3.0.1) (2025-03-18) 16 | 17 | 18 | ### Bug Fixes 19 | 20 | * add connections metadata with bigquery, pubsub and GCS simple bucket (https://github.com/terraform-google-modules/terraform-google-dataflow/pull/92) ([79a117b](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/79a117b5d8c1eaae79a33287fbf7c779103e0d63)) 21 | 22 | ## [3.0.0](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v2.5.0...v3.0.0) (2025-02-28) 23 | 24 | 25 | ### ⚠ BREAKING CHANGES 26 | 27 | * Create separate modules for legacy and flex template jobs, and generate metadata. ([#86](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/86)) 28 | 29 | ### Features 30 | 31 | * Create separate modules for legacy and flex template jobs, and generate metadata. ([#86](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/86)) ([22568bf](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/22568bfd2b179075db4788655e450d504fb5be84)) 32 | 33 | ## [2.5.0](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v2.4.0...v2.5.0) (2024-10-30) 34 | 35 | 36 | ### Features 37 | 38 | * **deps:** Update Terraform google to v6 ([#79](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/79)) ([ec28497](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/ec284970e085e9715a53d962ccd9b924b287c50d)) 39 | 40 | ## [2.4.0](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v2.3.0...v2.4.0) (2023-12-14) 41 | 42 | 43 | ### Features 44 | 45 | * Add support for additional_experiments dataflow job field ([#64](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/64)) ([5e1c674](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/5e1c674624b660c6d63aa571dfeccafbdfc279f1)) 46 | 47 | ## [2.3.0](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v2.2.0...v2.3.0) (2023-12-08) 48 | 49 | 50 | ### Features 51 | 52 | * Adds support to labels ([#57](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/57)) ([61341f0](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/61341f0fd2b6d0dc2c381484c1acd947da6de533)) 53 | 54 | 55 | ### Bug Fixes 56 | 57 | * upgraded versions.tf to include minor bumps from tpg v5 ([#60](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/60)) ([b70ddf9](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/b70ddf99fc4a1ee4a86f114a5d4783dc52d911db)) 58 | 59 | ## [2.2.0](https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v2.1.0...v2.2.0) (2022-02-23) 60 | 61 | 62 | ### Features 63 | 64 | * update TPG version constraints to allow 4.0 ([#34](https://github.com/terraform-google-modules/terraform-google-dataflow/issues/34)) ([374ab2d](https://github.com/terraform-google-modules/terraform-google-dataflow/commit/374ab2d2c7cd05ce1a156400d8fe6ce48d7a4a91)) 65 | 66 | ## [2.1.0](https://www.github.com/terraform-google-modules/terraform-google-dataflow/compare/v2.0.0...v2.1.0) (2021-07-07) 67 | 68 | 69 | ### Features 70 | 71 | * Add CMEK support ([#28](https://www.github.com/terraform-google-modules/terraform-google-dataflow/issues/28)) ([604207b](https://www.github.com/terraform-google-modules/terraform-google-dataflow/commit/604207be49d1b11a854eed68067979b8148aadd7)) 72 | 73 | ## [2.0.0](https://www.github.com/terraform-google-modules/terraform-google-dataflow/compare/v1.0.0...v2.0.0) (2021-07-01) 74 | 75 | 76 | ### ⚠ BREAKING CHANGES 77 | 78 | * add Terraform 0.13 constraint and module attribution (#24) 79 | 80 | ### Features 81 | 82 | * add Terraform 0.13 constraint and module attribution ([#24](https://www.github.com/terraform-google-modules/terraform-google-dataflow/issues/24)) ([c90fe5c](https://www.github.com/terraform-google-modules/terraform-google-dataflow/commit/c90fe5c86a440c1e92614c466a77709dd4e3b261)) 83 | * Allow dataflow to accept full self_link for working with shared vpc networks. ([#18](https://www.github.com/terraform-google-modules/terraform-google-dataflow/issues/18)) ([4194dea](https://www.github.com/terraform-google-modules/terraform-google-dataflow/commit/4194dea146a1dc8483157d03acbc44e9d122b6bd)) 84 | 85 | 86 | ### Miscellaneous Chores 87 | 88 | * release 2.0.0 ([db1f0ca](https://www.github.com/terraform-google-modules/terraform-google-dataflow/commit/db1f0ca715c09e56e8676e8712c28941b191a685)) 89 | 90 | ## [Unreleased] 91 | 92 | ## [1.0.0] 93 | 94 | ### Changed 95 | 96 | - Supported version of Terraform is 0.12. [#8] 97 | 98 | ## [0.3.0] - 2019-06-19 99 | 100 | ### Added 101 | 102 | * Add zone argument [#5] 103 | 104 | ## [0.2.0] - 2019-06-19 105 | 106 | ### Added 107 | 108 | * Add network, subnetwork and machine_type arguments [#4] 109 | 110 | ## v0.1.0 2019-04-05 111 | 112 | ### Added 113 | 114 | * Initial release of module. 115 | 116 | [Unreleased]: https://github.com/terraform-google-modules/terraform-google-kubernetes-engine/compare/v1.0.0...HEAD 117 | [1.0.0]: https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v0.3.0...v1.0.0 118 | [0.3.0]: https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v0.2.0...v0.3.0 119 | [0.2.0]: https://github.com/terraform-google-modules/terraform-google-dataflow/compare/v0.1.0...v0.2.0 120 | 121 | [#8]: https://github.com/terraform-google-modules/terraform-google-dataflow/pull/8 122 | [#5]: https://github.com/terraform-google-modules/terraform-google-dataflow/pull/5 123 | [#4]: https://github.com/terraform-google-modules/terraform-google-dataflow/pull/4 124 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # NOTE: This file is automatically generated from values at: 2 | # https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/blob/main/infra/terraform/test-org/org/locals.tf 3 | 4 | * @terraform-google-modules/cft-admins @ayushmjain @q2w 5 | 6 | # NOTE: GitHub CODEOWNERS locations: 7 | # https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-and-branch-protection 8 | 9 | CODEOWNERS @terraform-google-modules/cft-admins 10 | .github/CODEOWNERS @terraform-google-modules/cft-admins 11 | docs/CODEOWNERS @terraform-google-modules/cft-admins 12 | 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This document provides guidelines for contributing to the module. 4 | 5 | ## Dependencies 6 | 7 | The following dependencies must be installed on the development system: 8 | 9 | - [Docker Engine][docker-engine] 10 | - [Google Cloud SDK][google-cloud-sdk] 11 | - [make] 12 | 13 | ## Generating Documentation for Inputs and Outputs 14 | 15 | The Inputs and Outputs tables in the READMEs of the root module, 16 | submodules, and example modules are automatically generated based on 17 | the `variables` and `outputs` of the respective modules. These tables 18 | must be refreshed if the module interfaces are changed. 19 | 20 | ### Execution 21 | 22 | Run `make generate_docs` to generate new Inputs and Outputs tables. 23 | 24 | ## Integration Testing 25 | 26 | Integration tests are used to verify the behaviour of the root module, 27 | submodules, and example modules. Additions, changes, and fixes should 28 | be accompanied with tests. 29 | 30 | The integration tests are run using [Kitchen][kitchen], 31 | [Kitchen-Terraform][kitchen-terraform], and [InSpec][inspec]. These 32 | tools are packaged within a Docker image for convenience. 33 | 34 | The general strategy for these tests is to verify the behaviour of the 35 | [example modules](./examples/), thus ensuring that the root module, 36 | submodules, and example modules are all functionally correct. 37 | 38 | ### Test Environment 39 | The easiest way to test the module is in an isolated test project. The setup for such a project is defined in [test/setup](./test/setup/) directory. 40 | 41 | To use this setup, you need a service account with Project Creator access on a folder. Export the Service Account credentials to your environment like so: 42 | 43 | ``` 44 | export SERVICE_ACCOUNT_JSON=$(< credentials.json) 45 | ``` 46 | 47 | You will also need to set a few environment variables: 48 | ``` 49 | export TF_VAR_org_id="your_org_id" 50 | export TF_VAR_folder_id="your_folder_id" 51 | export TF_VAR_billing_account="your_billing_account_id" 52 | ``` 53 | 54 | With these settings in place, you can prepare a test project using Docker: 55 | ``` 56 | make docker_test_prepare 57 | ``` 58 | 59 | ### Noninteractive Execution 60 | 61 | Run `make docker_test_integration` to test all of the example modules 62 | noninteractively, using the prepared test project. 63 | 64 | ### Interactive Execution 65 | 66 | 1. Run `make docker_run` to start the testing Docker container in 67 | interactive mode. 68 | 69 | 1. Run `kitchen_do create ` to initialize the working 70 | directory for an example module. 71 | 72 | 1. Run `kitchen_do converge ` to apply the example module. 73 | 74 | 1. Run `kitchen_do verify ` to test the example module. 75 | 76 | 1. Run `kitchen_do destroy ` to destroy the example module 77 | state. 78 | 79 | ## Linting and Formatting 80 | 81 | Many of the files in the repository can be linted or formatted to 82 | maintain a standard of quality. 83 | 84 | ### Execution 85 | 86 | Run `make docker_test_lint`. 87 | 88 | [docker-engine]: https://www.docker.com/products/docker-engine 89 | [flake8]: http://flake8.pycqa.org/en/latest/ 90 | [gofmt]: https://golang.org/cmd/gofmt/ 91 | [google-cloud-sdk]: https://cloud.google.com/sdk/install 92 | [hadolint]: https://github.com/hadolint/hadolint 93 | [inspec]: https://inspec.io/ 94 | [kitchen-terraform]: https://github.com/newcontext-oss/kitchen-terraform 95 | [kitchen]: https://kitchen.ci/ 96 | [make]: https://en.wikipedia.org/wiki/Make_(software) 97 | [shellcheck]: https://www.shellcheck.net/ 98 | [terraform-docs]: https://github.com/segmentio/terraform-docs 99 | [terraform]: https://terraform.io/ 100 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Please note that this file was generated from [terraform-google-module-template](https://github.com/terraform-google-modules/terraform-google-module-template). 16 | # Please make sure to contribute relevant changes upstream! 17 | 18 | # Make will use bash instead of sh 19 | SHELL := /usr/bin/env bash 20 | 21 | DOCKER_TAG_VERSION_DEVELOPER_TOOLS := 1.22 22 | DOCKER_IMAGE_DEVELOPER_TOOLS := cft/developer-tools 23 | REGISTRY_URL := gcr.io/cloud-foundation-cicd 24 | 25 | # Enter docker container for local development 26 | .PHONY: docker_run 27 | docker_run: 28 | docker run --rm -it \ 29 | -e SERVICE_ACCOUNT_JSON \ 30 | -v "$(CURDIR)":/workspace \ 31 | $(REGISTRY_URL)/${DOCKER_IMAGE_DEVELOPER_TOOLS}:${DOCKER_TAG_VERSION_DEVELOPER_TOOLS} \ 32 | /bin/bash 33 | 34 | # Execute prepare tests within the docker container 35 | .PHONY: docker_test_prepare 36 | docker_test_prepare: 37 | docker run --rm -it \ 38 | -e SERVICE_ACCOUNT_JSON \ 39 | -e TF_VAR_org_id \ 40 | -e TF_VAR_folder_id \ 41 | -e TF_VAR_billing_account \ 42 | -v "$(CURDIR)":/workspace \ 43 | $(REGISTRY_URL)/${DOCKER_IMAGE_DEVELOPER_TOOLS}:${DOCKER_TAG_VERSION_DEVELOPER_TOOLS} \ 44 | /usr/local/bin/execute_with_credentials.sh prepare_environment 45 | 46 | # Clean up test environment within the docker container 47 | .PHONY: docker_test_cleanup 48 | docker_test_cleanup: 49 | docker run --rm -it \ 50 | -e SERVICE_ACCOUNT_JSON \ 51 | -e TF_VAR_org_id \ 52 | -e TF_VAR_folder_id \ 53 | -e TF_VAR_billing_account \ 54 | -v "$(CURDIR)":/workspace \ 55 | $(REGISTRY_URL)/${DOCKER_IMAGE_DEVELOPER_TOOLS}:${DOCKER_TAG_VERSION_DEVELOPER_TOOLS} \ 56 | /usr/local/bin/execute_with_credentials.sh cleanup_environment 57 | 58 | # Execute integration tests within the docker container 59 | .PHONY: docker_test_integration 60 | docker_test_integration: 61 | docker run --rm -it \ 62 | -e SERVICE_ACCOUNT_JSON \ 63 | -v "$(CURDIR)":/workspace \ 64 | $(REGISTRY_URL)/${DOCKER_IMAGE_DEVELOPER_TOOLS}:${DOCKER_TAG_VERSION_DEVELOPER_TOOLS} \ 65 | /usr/local/bin/test_integration.sh 66 | 67 | # Execute lint tests within the docker container 68 | .PHONY: docker_test_lint 69 | docker_test_lint: 70 | docker run --rm -it \ 71 | -e ENABLE_BPMETADATA=1 \ 72 | -v "$(CURDIR)":/workspace \ 73 | $(REGISTRY_URL)/${DOCKER_IMAGE_DEVELOPER_TOOLS}:${DOCKER_TAG_VERSION_DEVELOPER_TOOLS} \ 74 | /usr/local/bin/test_lint.sh 75 | 76 | # Generate documentation 77 | .PHONY: docker_generate_docs 78 | docker_generate_docs: 79 | docker run --rm -it \ 80 | -e ENABLE_BPMETADATA=1 \ 81 | -v "$(CURDIR)":/workspace \ 82 | $(REGISTRY_URL)/${DOCKER_IMAGE_DEVELOPER_TOOLS}:${DOCKER_TAG_VERSION_DEVELOPER_TOOLS} \ 83 | /bin/bash -c 'source /usr/local/bin/task_helper_functions.sh && generate_docs' 84 | 85 | # Alias for backwards compatibility 86 | .PHONY: generate_docs 87 | generate_docs: docker_generate_docs 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Google Dataflow Terraform Modules](https://registry.terraform.io/modules/terraform-google-modules/dataflow/google) 2 | 3 | This module handles opiniated Dataflow job configuration and deployments. Use 4 | submodules under `/modules/` directory. 5 | 6 | 7 | -------------------------------------------------------------------------------- /build/int.cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | timeout: 3600s 16 | steps: 17 | - id: swap-module-refs 18 | name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 19 | args: ['module-swapper'] 20 | - id: prepare 21 | name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 22 | args: ['/bin/bash', '-c', 'source /usr/local/bin/task_helper_functions.sh && prepare_environment'] 23 | env: 24 | - 'TF_VAR_org_id=$_ORG_ID' 25 | - 'TF_VAR_folder_id=$_FOLDER_ID' 26 | - 'TF_VAR_billing_account=$_BILLING_ACCOUNT' 27 | - id: create 28 | name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 29 | args: ['/bin/bash', '-c', 'source /usr/local/bin/task_helper_functions.sh && kitchen_do create'] 30 | - id: converge 31 | name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 32 | args: ['/bin/bash', '-c', 'source /usr/local/bin/task_helper_functions.sh && kitchen_do converge'] 33 | - id: verify 34 | name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 35 | args: ['/bin/bash', '-c', 'source /usr/local/bin/task_helper_functions.sh && kitchen_do verify'] 36 | - id: destroy 37 | name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 38 | args: ['/bin/bash', '-c', 'source /usr/local/bin/task_helper_functions.sh && kitchen_do destroy'] 39 | tags: 40 | - 'ci' 41 | - 'integration' 42 | substitutions: 43 | _DOCKER_IMAGE_DEVELOPER_TOOLS: 'cft/developer-tools' 44 | _DOCKER_TAG_VERSION_DEVELOPER_TOOLS: '1.22' 45 | -------------------------------------------------------------------------------- /build/lint.cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | - name: 'gcr.io/cloud-foundation-cicd/$_DOCKER_IMAGE_DEVELOPER_TOOLS:$_DOCKER_TAG_VERSION_DEVELOPER_TOOLS' 17 | id: 'lint' 18 | args: ['/usr/local/bin/test_lint.sh'] 19 | tags: 20 | - 'ci' 21 | - 'lint' 22 | substitutions: 23 | _DOCKER_IMAGE_DEVELOPER_TOOLS: 'cft/developer-tools' 24 | _DOCKER_TAG_VERSION_DEVELOPER_TOOLS: '1.22' 25 | -------------------------------------------------------------------------------- /doc/upgrading_to_v3.0.md: -------------------------------------------------------------------------------- 1 | # Upgrading to v3.0 2 | Dataflow legacy template job module has been moved to /modules/legacy. 3 | 4 | ```diff 5 | module "dataflow-job" { 6 | - source = "terraform-google-modules/dataflow/google" 7 | + source = "terraform-google-modules/dataflow/google//modules/legacy" 8 | ... 9 | } 10 | ``` 11 | 12 | In addition, below variables have changed: 13 | - Added `enable_streaming_engine ` and `skip_wait_on_job_termination` 14 | - Changed `ip_configuration` to `use_public_ips` 15 | - Renamed `network_self_link ` to `network_name` 16 | - Renamed `subnetwork_self_link ` to `subnetwork` 17 | - Removed `zone` 18 | ```diff 19 | module "dataflow-job" { 20 | ... 21 | - ip_configuration = "WORKER_IP_PUBLIC" 22 | + use_public_ips = true 23 | - network_self_link = "default" 24 | + network_name = "default" 25 | - subnetwork_self_link = "regions/us-central1/subnetworks/dataflow-pipeline" 26 | + subnetwork = "regions/us-central1/subnetworks/dataflow-pipeline" 27 | - zone = "us-central1-a" 28 | ... 29 | } 30 | ``` 31 | -------------------------------------------------------------------------------- /examples/dlp_api_example/README.md: -------------------------------------------------------------------------------- 1 | # DLP API Example 2 | 3 | This dataflow example runs the DLP Dataflow template under gs://dataflow-templates/latest/Stream_DLP_GCS_Text_to_BigQuery. It downloads a fake credit card [zipfile](http://eforexcel.com/wp/wp-content/uploads/2017/07/1500000%20CC%20Records.zip) unzips to a csv, deidentifies the credit card number and pin columns using the DLP API and dumps the data into a BigQuery dataset. 4 | 5 | This terraform script allows users to use their own pre-created KMS key ring/key/wrapped key by setting the variable `create_key_ring=false` or can also create all such resources for them by setting the variable `create_key_ring=true`. 6 | 7 | 8 | ## Best practices 9 | 10 | ### Cost and Performance 11 | As featured in this example, using a single regional bucket for storing your jobs' temporary data is recommended to optimize cost. 12 | Also, to optimize your jobs performance, this bucket should always in the corresponding region of the zones in which your jobs are running. 13 | ## 14 | Make sure the terraform service account to execute the example has the basic permissions needed for the module listed [here](../../README#configure-a-service-account-to-execute-the-module) 15 | Grant these additional permissions to the service account needed to run the example: 16 | - roles/bigquery.admin 17 | - roles/iam.serviceAccountUser 18 | - roles/storage.admin 19 | - roles/cloudkms.admin 20 | - roles/dlp.admin 21 | - roles/cloudkms.cryptoKeyEncrypterDecrypter 22 | 23 | ### Controller Service Account 24 | This example features the use of a controller service account which is specified with the `service_account_email` input variables. 25 | We recommend using a custom service account with fine-grained access control to mitigate security risks. See more about controller service accounts [here](https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#controller_service_account) 26 | 27 | In order to execute this module, your Controller Service Account uses the following project roles: 28 | - roles/dataflow.worker 29 | - roles/storage.admin 30 | - roles/bigquery.admin 31 | - roles/cloudkms.admin 32 | - roles/dlp.admin 33 | - roles/cloudkms.cryptoKeyEncrypterDecrypter 34 | 35 | ### GCloud 36 | This example uses gcloud shell commands to create a wrapped key and download the sample cc data. Please ensure that you have gcloud [installed](https://cloud.google.com/sdk/install) are authenticated using `gcloud init` and also properly set the project `gcloud config set project my-project`. You may need to enable the following APIs- see [here](https://cloud.google.com/apis/docs/enable-disable-apis) 37 | - Cloud Key Management Service (KMS) API: `cloudkms.googleapis.com` 38 | - Cloud Storage API : `storage-component.googleapis.com` 39 | - DLP API: `dlp.googleapis.com` 40 | 41 | 42 | [^]: (autogen_docs_start) 43 | 44 | ## Inputs 45 | 46 | | Name | Description | Type | Default | Required | 47 | |------|-------------|:----:|:-----:|:-----:| 48 | | project\_id | The project ID to deploy to | string | n/a | yes | 49 | | region | The region in which the bucket and the dataflow job will be deployed | string | n/a | yes | 50 | | service\_account\_email | The Service Account email used to create the job. | string | n/a | yes | 51 | | key\_ring | The KMS key ring used to create a wrapped key (can be existing or created) | string | n/a | yes | 52 | | kms\_key\_name | The KMS key within the key ring used to create a wrapped key (can be existing or created) | string | n/a | yes | 53 | | wrapped\_key | The wrapped key generated from KMS used to encrypt sensitive information (leave blank if generating from terraform) | string | "" | yes | 54 | | create\_key\_ring | Boolean for creating own KMS key ring/key or using pre-created resource | string | "true" | yes | 55 | 56 | ## Outputs 57 | 58 | | Name | Description | 59 | |------|-------------| 60 | | bucket\_name | The name of the bucket | 61 | | df\_job\_id | The unique Id of the newly created Dataflow job | 62 | | df\_job\_name | The name of the newly created Dataflow job | 63 | | df\_job\_state | The state of the newly created Dataflow job | 64 | | project\_id | The project's ID | 65 | 66 | [^]: (autogen_docs_end) 67 | 68 | To provision this example, run the following from within this directory: 69 | - `terraform init` to get the plugins 70 | - `terraform plan` to see the infrastructure plan 71 | - `terraform apply` to apply the infrastructure build 72 | - `terraform destroy` to destroy the built infrastructure. (Note that KMS key rings and crypto keys cannot be destroyed!) 73 | 74 | 75 | ## Inputs 76 | 77 | | Name | Description | Type | Default | Required | 78 | |------|-------------|------|---------|:--------:| 79 | | create\_key\_ring | Boolean for determining whether to create key ring with keys(true or false) | `bool` | `true` | no | 80 | | key\_ring | The GCP KMS key ring to be created | `string` | n/a | yes | 81 | | kms\_key\_name | The GCP KMS key to be created going under the key ring | `string` | n/a | yes | 82 | | project\_id | The project ID to deploy to | `string` | n/a | yes | 83 | | region | The region in which the bucket and the dataflow job will be deployed | `string` | `"us-central1"` | no | 84 | | service\_account\_email | The Service Account email used to create the job. | `string` | n/a | yes | 85 | | terraform\_service\_account\_email | The Service Account email used by terraform to spin up resources- the one from environmental variable GOOGLE\_APPLICATION\_CREDENTIALS | `string` | n/a | yes | 86 | | wrapped\_key | Wrapped key from KMS leave blank if create\_key\_ring=true | `string` | `""` | no | 87 | 88 | ## Outputs 89 | 90 | | Name | Description | 91 | |------|-------------| 92 | | bucket\_name | The name of the bucket | 93 | | df\_job\_id | The unique Id of the newly created Dataflow job | 94 | | df\_job\_name | The name of the newly created Dataflow job | 95 | | df\_job\_state | The state of the newly created Dataflow job | 96 | | project\_id | The project's ID | 97 | 98 | 99 | -------------------------------------------------------------------------------- /examples/dlp_api_example/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | provider "google" { 18 | region = var.region 19 | } 20 | 21 | resource "random_id" "random_suffix" { 22 | byte_length = 4 23 | } 24 | 25 | locals { 26 | gcs_bucket_name = "tmp-dir-bucket-${random_id.random_suffix.hex}" 27 | } 28 | 29 | module "dataflow-bucket" { 30 | source = "terraform-google-modules/dataflow/google//modules/dataflow_bucket" 31 | version = "~> 2.0" 32 | 33 | name = local.gcs_bucket_name 34 | region = var.region 35 | project_id = var.project_id 36 | } 37 | 38 | resource "null_resource" "download_sample_cc_into_gcs" { 39 | provisioner "local-exec" { 40 | command = < cc_records.zip 42 | unzip cc_records.zip 43 | rm cc_records.zip 44 | mv 1500000\ CC\ Records.csv cc_records.csv 45 | gsutil cp cc_records.csv gs://${module.dataflow-bucket.name} 46 | rm cc_records.csv 47 | EOF 48 | 49 | } 50 | } 51 | 52 | resource "null_resource" "deinspection_template_setup" { 53 | provisioner "local-exec" { 54 | command = <> original_key.txt 103 | original_key="$(cat original_key.txt)" 104 | gcloud kms keys add-iam-policy-binding ${var.kms_key_name} --project ${var.project_id} --location global --keyring ${var.key_ring} --member serviceAccount:${var.terraform_service_account_email} --role roles/cloudkms.cryptoKeyEncrypterDecrypter 105 | curl -s -X POST "https://cloudkms.googleapis.com/v1/projects/${var.project_id}/locations/global/keyRings/${var.key_ring}/cryptoKeys/${var.kms_key_name}:encrypt" -d '{"plaintext":"'$original_key'"}' -H "Authorization:Bearer $(gcloud auth application-default print-access-token)" -H "Content-Type:application/json" | python -c "import sys, json; print(json.load(sys.stdin)['ciphertext'])" >> wrapped_key.txt 106 | EOF 107 | 108 | } 109 | } 110 | 111 | module "dataflow-job" { 112 | source = "terraform-google-modules/dataflow/google//modules/legacy" 113 | version = "~> 2.0" 114 | 115 | project_id = var.project_id 116 | name = "dlp_example_${null_resource.download_sample_cc_into_gcs.id}_${null_resource.deinspection_template_setup.id}" 117 | on_delete = "cancel" 118 | region = var.region 119 | template_gcs_path = "gs://dataflow-templates/latest/Stream_DLP_GCS_Text_to_BigQuery" 120 | temp_gcs_location = module.dataflow-bucket.name 121 | service_account_email = var.service_account_email 122 | max_workers = 5 123 | 124 | parameters = { 125 | inputFilePattern = "gs://${module.dataflow-bucket.name}/cc_records.csv" 126 | datasetName = google_bigquery_dataset.default.dataset_id 127 | batchSize = 1000 128 | dlpProjectId = var.project_id 129 | deidentifyTemplateName = "projects/${var.project_id}/deidentifyTemplates/15" 130 | } 131 | 132 | labels = { 133 | example_name = "dlp_api_example" 134 | } 135 | } 136 | 137 | resource "null_resource" "destroy_deidentify_template" { 138 | triggers = { 139 | project_id = var.project_id 140 | } 141 | 142 | provisioner "local-exec" { 143 | when = destroy 144 | command = < 57 | ## Inputs 58 | 59 | | Name | Description | Type | Default | Required | 60 | |------|-------------|------|---------|:--------:| 61 | | force\_destroy | When deleting a bucket, this boolean option will delete all contained objects. If you try to delete a bucket that contains objects, Terraform will fail that run. | `bool` | `false` | no | 62 | | project\_id | The project ID to deploy to | `string` | n/a | yes | 63 | | region | The region in which the bucket will be deployed | `string` | n/a | yes | 64 | | service\_account\_email | The Service Account email used to create the job. | `string` | n/a | yes | 65 | 66 | ## Outputs 67 | 68 | | Name | Description | 69 | |------|-------------| 70 | | bucket\_name | The name of the bucket | 71 | | df\_job\_id | The unique Id of the newly created Dataflow job | 72 | | df\_job\_id\_2 | The unique Id of the newly created Dataflow job | 73 | | df\_job\_name | The name of the newly created Dataflow job | 74 | | df\_job\_name\_2 | The name of the newly created Dataflow job | 75 | | df\_job\_state | The state of the newly created Dataflow job | 76 | | df\_job\_state\_2 | The state of the newly created Dataflow job | 77 | | project\_id | The project's ID | 78 | 79 | 80 | -------------------------------------------------------------------------------- /examples/simple_example/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | provider "google" { 18 | region = var.region 19 | } 20 | 21 | resource "random_id" "random_suffix" { 22 | byte_length = 4 23 | } 24 | 25 | locals { 26 | gcs_bucket_name = "tmp-dir-bucket-${random_id.random_suffix.hex}" 27 | } 28 | 29 | module "vpc" { 30 | source = "terraform-google-modules/network/google" 31 | version = "~> 10.0" 32 | project_id = var.project_id 33 | network_name = "dataflow-network" 34 | 35 | subnets = [ 36 | { 37 | subnet_name = "dataflow-subnetwork" 38 | subnet_ip = "10.1.3.0/24" 39 | subnet_region = "us-central1" 40 | }, 41 | ] 42 | 43 | secondary_ranges = { 44 | dataflow-subnetwork = [ 45 | { 46 | range_name = "my-secondary-range" 47 | ip_cidr_range = "192.168.64.0/24" 48 | }, 49 | ] 50 | } 51 | } 52 | 53 | module "dataflow-bucket" { 54 | source = "terraform-google-modules/dataflow/google//modules/dataflow_bucket" 55 | version = "~> 2.0" 56 | 57 | name = local.gcs_bucket_name 58 | region = var.region 59 | project_id = var.project_id 60 | force_destroy = var.force_destroy 61 | } 62 | 63 | module "dataflow-job" { 64 | source = "terraform-google-modules/dataflow/google//modules/legacy" 65 | version = "~> 2.0" 66 | 67 | project_id = var.project_id 68 | name = "wordcount-terraform-example" 69 | on_delete = "cancel" 70 | region = var.region 71 | max_workers = 1 72 | template_gcs_path = "gs://dataflow-templates/latest/Word_Count" 73 | temp_gcs_location = module.dataflow-bucket.name 74 | service_account_email = var.service_account_email 75 | network_name = module.vpc.network_self_link 76 | subnetwork = module.vpc.subnets_self_links[0] 77 | machine_type = "n1-standard-1" 78 | 79 | parameters = { 80 | inputFile = "gs://dataflow-samples/shakespeare/kinglear.txt" 81 | output = "gs://${local.gcs_bucket_name}/output/my_output" 82 | } 83 | } 84 | 85 | module "dataflow-job-2" { 86 | source = "terraform-google-modules/dataflow/google//modules/legacy" 87 | version = "~> 2.0" 88 | 89 | project_id = var.project_id 90 | name = "wordcount-terraform-example-2" 91 | on_delete = "cancel" 92 | region = var.region 93 | max_workers = 1 94 | template_gcs_path = "gs://dataflow-templates/latest/Word_Count" 95 | temp_gcs_location = module.dataflow-bucket.name 96 | service_account_email = var.service_account_email 97 | network_name = module.vpc.network_self_link 98 | subnetwork = module.vpc.subnets_self_links[0] 99 | machine_type = "n1-standard-2" 100 | 101 | parameters = { 102 | inputFile = "gs://dataflow-samples/shakespeare/kinglear.txt" 103 | output = "gs://${local.gcs_bucket_name}/output/my_output" 104 | } 105 | 106 | labels = { 107 | example_name = "simple_example" 108 | } 109 | } 110 | 111 | -------------------------------------------------------------------------------- /examples/simple_example/outputs.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | output "project_id" { 18 | value = var.project_id 19 | description = "The project's ID" 20 | } 21 | 22 | output "df_job_state" { 23 | description = "The state of the newly created Dataflow job" 24 | value = module.dataflow-job.state 25 | } 26 | 27 | output "df_job_id" { 28 | description = "The unique Id of the newly created Dataflow job" 29 | value = module.dataflow-job.id 30 | } 31 | 32 | output "df_job_name" { 33 | description = "The name of the newly created Dataflow job" 34 | value = module.dataflow-job.name 35 | } 36 | 37 | output "df_job_state_2" { 38 | description = "The state of the newly created Dataflow job" 39 | value = module.dataflow-job-2.state 40 | } 41 | 42 | output "df_job_id_2" { 43 | description = "The unique Id of the newly created Dataflow job" 44 | value = module.dataflow-job-2.id 45 | } 46 | 47 | output "df_job_name_2" { 48 | description = "The name of the newly created Dataflow job" 49 | value = module.dataflow-job-2.name 50 | } 51 | 52 | output "bucket_name" { 53 | description = "The name of the bucket" 54 | value = module.dataflow-bucket.name 55 | } 56 | 57 | -------------------------------------------------------------------------------- /examples/simple_example/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "project_id" { 18 | type = string 19 | description = "The project ID to deploy to" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "The region in which the bucket will be deployed" 25 | } 26 | 27 | variable "service_account_email" { 28 | type = string 29 | description = "The Service Account email used to create the job." 30 | } 31 | 32 | variable "force_destroy" { 33 | type = bool 34 | description = "When deleting a bucket, this boolean option will delete all contained objects. If you try to delete a bucket that contains objects, Terraform will fail that run." 35 | default = false 36 | } 37 | 38 | -------------------------------------------------------------------------------- /metadata.display.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow-display 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Google Dataflow Terraform Modules 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | ui: 28 | input: {} 29 | -------------------------------------------------------------------------------- /metadata.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Google Dataflow Terraform Modules 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | description: {} 28 | content: 29 | subBlueprints: 30 | - name: dataflow_bucket 31 | location: modules/dataflow_bucket 32 | - name: flex 33 | location: modules/flex 34 | - name: legacy 35 | location: modules/legacy 36 | examples: 37 | - name: dlp_api_example 38 | location: examples/dlp_api_example 39 | - name: simple_example 40 | location: examples/simple_example 41 | interfaces: {} 42 | requirements: 43 | roles: 44 | - level: Project 45 | roles: 46 | - roles/owner 47 | - roles/dataflow.admin 48 | services: 49 | - cloudresourcemanager.googleapis.com 50 | - storage-api.googleapis.com 51 | - serviceusage.googleapis.com 52 | - dataflow.googleapis.com 53 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/README.md: -------------------------------------------------------------------------------- 1 | # Terraform Google Dataflow Bucket Submodule 2 | 3 | This submodule helps for the creation and deployment of the GCS bucket used to store temporary job data. 4 | 5 | ## Constants 6 | The bucket is forced to be regional to optimize cost. 7 | It is also not using `lifecycle_rule` or `force_destroy` options to prevent destroying temporary data while a job is running. 8 | 9 | 10 | ## Usage 11 | You may use this bucket for a single job or for multiple jobs. 12 | If deploying multiple jobs in a single region, the best practice is to use a single bucket for temporary job data. 13 | Applying this best practice will optimize your jobs' performance. 14 | See [here](../example/simple_example) for a multi jobs example. 15 | 16 | 17 | [^]: (autogen_docs_start) 18 | 19 | ## Inputs 20 | 21 | | Name | Description | Type | Default | Required | 22 | |------|-------------|:----:|:-----:|:-----:| 23 | | force\_destroy | When deleting a bucket, this boolean option will delete all contained objects. If you try to delete a bucket that contains objects, Terraform will fail that run. | string | `"false"` | no | 24 | | name | The name of the bucket. | string | n/a | yes | 25 | | project\_id | The project_id to deploy the example instance into. (e.g. "simple-sample-project-1234") | string | n/a | yes | 26 | | region | The GCS bucket region. This should be the same as your dataflow job's zone ot optimize performance. | string | `"us-central1"` | no | 27 | 28 | ## Outputs 29 | 30 | | Name | Description | 31 | |------|-------------| 32 | | name | The name of the bucket | 33 | | region | The bucket's region location | 34 | 35 | [^]: (autogen_docs_end) 36 | 37 | 38 | ## Tests 39 | 40 | The integration test [here](../test/integration/simple_example/controls/gcloud.rb) checks if the lifecycle rule is not enabled. 41 | This test also implicitly checks whether or not the bucket was successfully created. 42 | 43 | 44 | ## Inputs 45 | 46 | | Name | Description | Type | Default | Required | 47 | |------|-------------|------|---------|:--------:| 48 | | force\_destroy | When deleting a bucket, this boolean option will delete all contained objects. If you try to delete a bucket that contains objects, Terraform will fail that run. | `bool` | `false` | no | 49 | | name | The name of the bucket. | `string` | n/a | yes | 50 | | project\_id | The project\_id to deploy the example instance into. (e.g. "simple-sample-project-1234") | `string` | n/a | yes | 51 | | region | The GCS bucket region. This should be the same as your dataflow job's zone ot optimize performance. | `string` | `"us-central1"` | no | 52 | 53 | ## Outputs 54 | 55 | | Name | Description | 56 | |------|-------------| 57 | | name | The name of the bucket | 58 | | region | The bucket's region location | 59 | 60 | 61 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_storage_bucket" "tmp_dir_bucket" { 18 | name = var.name 19 | location = var.region 20 | storage_class = "REGIONAL" 21 | project = var.project_id 22 | force_destroy = var.force_destroy 23 | } 24 | 25 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/metadata.display.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow-dataflow-bucket-display 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Terraform Google Dataflow Bucket Submodule 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | dir: /modules/dataflow_bucket 28 | ui: 29 | input: 30 | variables: 31 | force_destroy: 32 | name: force_destroy 33 | title: Force Destroy 34 | name: 35 | name: name 36 | title: Name 37 | project_id: 38 | name: project_id 39 | title: Project Id 40 | region: 41 | name: region 42 | title: Region 43 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/metadata.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow-dataflow-bucket 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Terraform Google Dataflow Bucket Submodule 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | dir: /modules/dataflow_bucket 28 | version: 3.0.2 29 | actuationTool: 30 | flavor: Terraform 31 | version: ">= 1.3" 32 | description: {} 33 | content: 34 | examples: 35 | - name: dlp_api_example 36 | location: examples/dlp_api_example 37 | - name: simple_example 38 | location: examples/simple_example 39 | interfaces: 40 | variables: 41 | - name: project_id 42 | description: The project_id to deploy the example instance into. (e.g. "simple-sample-project-1234") 43 | varType: string 44 | required: true 45 | - name: region 46 | description: The GCS bucket region. This should be the same as your dataflow job's zone ot optimize performance. 47 | varType: string 48 | defaultValue: us-central1 49 | - name: name 50 | description: The name of the bucket. 51 | varType: string 52 | required: true 53 | - name: force_destroy 54 | description: When deleting a bucket, this boolean option will delete all contained objects. If you try to delete a bucket that contains objects, Terraform will fail that run. 55 | varType: bool 56 | defaultValue: false 57 | outputs: 58 | - name: name 59 | description: The name of the bucket 60 | type: string 61 | - name: region 62 | description: The bucket's region location 63 | type: string 64 | requirements: 65 | roles: 66 | - level: Project 67 | roles: 68 | - roles/dataflow.admin 69 | - roles/dataflow.worker 70 | - roles/storage.admin 71 | - roles/compute.networkAdmin 72 | - roles/pubsub.editor 73 | - roles/bigquery.dataEditor 74 | - roles/artifactregistry.writer 75 | - roles/iam.serviceAccountUser 76 | - roles/resourcemanager.projectIamAdmin 77 | services: 78 | - cloudresourcemanager.googleapis.com 79 | - storage-api.googleapis.com 80 | - serviceusage.googleapis.com 81 | - dataflow.googleapis.com 82 | providerVersions: 83 | - source: hashicorp/google-beta 84 | version: ">= 3.53, < 7" 85 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/outputs.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | output "name" { 18 | description = "The name of the bucket" 19 | value = var.name 20 | } 21 | 22 | output "region" { 23 | description = "The bucket's region location" 24 | value = var.region 25 | } 26 | 27 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "project_id" { 18 | type = string 19 | description = "The project_id to deploy the example instance into. (e.g. \"simple-sample-project-1234\")" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "The GCS bucket region. This should be the same as your dataflow job's zone ot optimize performance." 25 | default = "us-central1" 26 | } 27 | 28 | variable "name" { 29 | type = string 30 | description = "The name of the bucket." 31 | } 32 | 33 | variable "force_destroy" { 34 | type = bool 35 | description = "When deleting a bucket, this boolean option will delete all contained objects. If you try to delete a bucket that contains objects, Terraform will fail that run." 36 | default = false 37 | } 38 | 39 | -------------------------------------------------------------------------------- /modules/dataflow_bucket/versions.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_version = ">= 1.3" 19 | required_providers { 20 | 21 | google = { 22 | source = "hashicorp/google" 23 | version = ">= 3.53, < 7" 24 | } 25 | } 26 | 27 | provider_meta "google" { 28 | module_name = "blueprints/terraform/terraform-google-cloud-storage:simple_bucket/v3.0.2" 29 | } 30 | 31 | } 32 | 33 | -------------------------------------------------------------------------------- /modules/flex/README.md: -------------------------------------------------------------------------------- 1 | # [Google Dataflow Flex Template Job Terraform Module](https://registry.terraform.io/modules/terraform-google-modules/dataflow/google) 2 | 3 | This module handles opiniated Dataflow job configuration and deployments. 4 | 5 | The resources/services/activations/deletions that this module will create/trigger are: 6 | - Create a GCS bucket for temporary job data 7 | - Create a Dataflow job 8 | 9 | ## Compatibility 10 | This module is meant for use with Terraform 0.13+ and tested using Terraform 1.0+. If you find incompatibilities using Terraform >=0.13, please open an issue. 11 | If you haven't 12 | [upgraded](https://www.terraform.io/upgrade-guides/0-13.html) and need a Terraform 13 | 0.12.x-compatible version of this module, the last released version 14 | intended for Terraform 0.12.x is [v1.0.0](https://registry.terraform.io/modules/terraform-google-modules/-dataflow/google/v1.0.0). 15 | 16 | ## Usage 17 | 18 | Before using this module, one should get familiar with the `google_dataflow_job`’s [Notes on “destroy”/”apply”](https://www.terraform.io/docs/providers/google/r/dataflow_job.html#note-on-quot-destroy-quot-quot-apply-quot-) as the behavior is atypical when compared to other resources. 19 | 20 | ### Assumption 21 | The module is made to be used with the template_gcs_path as the staging location. 22 | Hence, one assumption is that, before using this module, you already have working Dataflow job template(s) in GCS staging location(s). 23 | 24 | There are examples included in the [examples](./examples/) folder but simple usage is as follows: 25 | 26 | ```hcl 27 | module "dataflow-job" { 28 | source = "terraform-google-modules/dataflow/google//modules/flex" 29 | version = "0.1.0" 30 | 31 | project_id = "" 32 | name = "" 33 | on_delete = "cancel" 34 | zone = "us-central1-a" 35 | max_workers = 1 36 | template_gcs_path = "gs://" 37 | temp_gcs_location = "gs:// 54 | ## Inputs 55 | 56 | | Name | Description | Type | Default | Required | 57 | |------|-------------|------|---------|:--------:| 58 | | additional\_experiments | List of experiments that should be used by the job. An example value is `['enable_stackdriver_agent_metrics']` | `list(string)` | `[]` | no | 59 | | autoscaling\_algorithm | The algorithm to use for autoscaling. | `string` | `null` | no | 60 | | container\_spec\_gcs\_path | The GCS path to the Dataflow job Flex Template. | `string` | n/a | yes | 61 | | enable\_streaming\_engine | Enable/disable the use of Streaming Engine for the job. | `bool` | `false` | no | 62 | | kms\_key\_name | The name for the Cloud KMS key for the job. Key format is: projects/PROJECT\_ID/locations/LOCATION/keyRings/KEY\_RING/cryptoKeys/KEY | `string` | `null` | no | 63 | | labels | User labels to be specified for the job. | `map(string)` | `{}` | no | 64 | | launcher\_machine\_type | The machine type to use for launching the job. | `string` | `""` | no | 65 | | machine\_type | The machine type to use for the job. | `string` | `""` | no | 66 | | max\_workers | The number of workers permitted to work on the job. More workers may improve processing speed at additional cost. | `number` | `1` | no | 67 | | name | The name of the dataflow job | `string` | n/a | yes | 68 | | network\_name | The network to which VMs will be assigned. | `string` | `"default"` | no | 69 | | on\_delete | One of drain or cancel. Specifies behavior of deletion during terraform destroy. The default is cancel. | `string` | `"cancel"` | no | 70 | | parameters | Key/Value pairs to be passed to the Dataflow job (as used in the template). | `map(string)` | `{}` | no | 71 | | project\_id | The project in which the resource belongs. If it is not provided, the provider project is used. | `string` | n/a | yes | 72 | | region | The region in which the created job should run. Also determines the location of the staging bucket if created. | `string` | `"us-central1"` | no | 73 | | sdk\_container\_image | Docker registry location of container image to use for the 'worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable pipelines. | `string` | `null` | no | 74 | | service\_account\_email | The Service Account email that will be used to identify the VMs in which the jobs are running | `string` | `""` | no | 75 | | skip\_wait\_on\_job\_termination | If set to true, terraform will treat DRAINING and CANCELLING as terminal states when deleting the resource, and will remove the resource from terraform state and move on. | `bool` | `false` | no | 76 | | subnetwork | The subnetwork to which VMs will be assigned. If provided, it should be of the form of 'regions/REGION/subnetworks/SUBNETWORK'. | `string` | `""` | no | 77 | | temp\_location | The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://. | `string` | `null` | no | 78 | | use\_public\_ips | Specifies whether Dataflow workers use external IP addresses. If the value is set to false, Dataflow workers use internal IP addresses for all communication. | `bool` | `false` | no | 79 | 80 | ## Outputs 81 | 82 | | Name | Description | 83 | |------|-------------| 84 | | container\_spec\_gcs\_path | The GCS path to the Dataflow job Flex Template. | 85 | | id | The unique Id of the newly created Dataflow job | 86 | | name | The name of the dataflow job | 87 | | state | The state of the newly created Dataflow job | 88 | | temp\_location | The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://. | 89 | 90 | 91 | 92 | ## Requirements 93 | 94 | Before this module can be used on a project, you must ensure that the following pre-requisites are fulfilled: 95 | 96 | 1. Terraform is [installed](#software-dependencies) on the machine where Terraform is executed. 97 | 2. The Service Account you execute the module with has the right [permissions](#configure-a-service-account). 98 | 3. The necessary APIs are [active](#enable-apis) on the project. 99 | 4. A working Dataflow template in uploaded in a GCS bucket 100 | 101 | The [project factory](https://github.com/terraform-google-modules/terraform-google-project-factory) can be used to provision projects with the correct APIs active. 102 | 103 | ### Software Dependencies 104 | ### Terraform 105 | - [Terraform](https://www.terraform.io/downloads.html) >= 0.13.0 106 | - [terraform-provider-google](https://github.com/terraform-providers/terraform-provider-google) plugin v2.18.0 107 | 108 | ### Configure a Service Account to execute the module 109 | 110 | In order to execute this module you must have a Service Account with the 111 | following project roles: 112 | 113 | - roles/dataflow.admin 114 | - roles/iam.serviceAccountUser 115 | - roles/storage.admin 116 | 117 | ### Configure a Controller Service Account to create the job 118 | 119 | If you want to use the service_account_email input to specify a service account that will identify the VMs in which the jobs are running, the service account will need the following project roles: 120 | 121 | - roles/dataflow.worker 122 | - roles/storage.objectAdmin 123 | 124 | ### Configure a Customer Managed Encryption Key 125 | 126 | If you want to use [Customer Managed Encryption Keys](https://cloud.google.com/kms/docs/cmek) in the [Dataflow Job](https://cloud.google.com/dataflow/docs/guides/customer-managed-encryption-keys) use the variable `kms_key_name` to provide a valid key. 127 | Follow the instructions in [Granting Encrypter/Decrypter permissions](https://cloud.google.com/dataflow/docs/guides/customer-managed-encryption-keys#granting_encrypterdecrypter_permissions) to configure the necessary roles for the Dataflow service accounts. 128 | 129 | ### Enable APIs 130 | 131 | In order to launch a Dataflow Job, the Dataflow API must be enabled: 132 | 133 | - Dataflow API - `dataflow.googleapis.com` 134 | - Compute Engine API: `compute.googleapis.com` 135 | 136 | **Note:** If you want to use a Customer Managed Encryption Key, the Cloud Key Management Service (KMS) API must be enabled: 137 | 138 | - Cloud Key Management Service (KMS) API: `cloudkms.googleapis.com` 139 | 140 | ## Install 141 | 142 | ### Terraform 143 | Be sure you have the correct Terraform version (0.12.x), you can choose the binary here: 144 | - https://releases.hashicorp.com/terraform/ 145 | 146 | ## Testing 147 | 148 | ### Requirements 149 | - [bundler](https://github.com/bundler/bundler) 150 | - [gcloud](https://cloud.google.com/sdk/install) 151 | - [terraform-docs](https://github.com/segmentio/terraform-docs/releases) 0.6.0 152 | 153 | ### Autogeneration of documentation from .tf files 154 | Run 155 | ``` 156 | make generate_docs 157 | ``` 158 | 159 | ### Integration test 160 | 161 | Integration tests are run though [test-kitchen](https://github.com/test-kitchen/test-kitchen), [kitchen-terraform](https://github.com/newcontext-oss/kitchen-terraform), and [InSpec](https://github.com/inspec/inspec). 162 | 163 | `test-kitchen` instances are defined in [`.kitchen.yml`](./.kitchen.yml). The test-kitchen instances in `test/fixtures/` wrap identically-named examples in the `examples/` directory. 164 | 165 | #### Setup 166 | 167 | 1. Configure the [test fixtures](#test-configuration) 168 | 2. Download a Service Account key with the necessary permissions and put it in the module's root directory with the name `credentials.json`. 169 | 3. Build the Docker container for testing: 170 | 171 | ``` 172 | make docker_build_kitchen_terraform 173 | ``` 174 | 4. Run the testing container in interactive mode: 175 | 176 | ``` 177 | make docker_run 178 | ``` 179 | 180 | The module root directory will be loaded into the Docker container at `/cft/workdir/`. 181 | 5. Run kitchen-terraform to test the infrastructure: 182 | 183 | 1. `kitchen create` creates Terraform state and downloads modules, if applicable. 184 | 2. `kitchen converge` creates the underlying resources. Run `kitchen converge ` to create resources for a specific test case. 185 | 3. `kitchen verify` tests the created infrastructure. Run `kitchen verify ` to run a specific test case. 186 | 4. `kitchen destroy` tears down the underlying resources created by `kitchen converge`. Run `kitchen destroy ` to tear down resources for a specific test case. 187 | 188 | Alternatively, you can simply run `make test_integration_docker` to run all the test steps non-interactively. 189 | 190 | #### Test configuration 191 | 192 | Each test-kitchen instance is configured with a `variables.tfvars` file in the test fixture directory. For convenience, since all of the variables are project-specific, these files have been symlinked to `test/fixtures/shared/terraform.tfvars`. 193 | Similarly, each test fixture has a `variables.tf` to define these variables, and an `outputs.tf` to facilitate providing necessary information for `inspec` to locate and query against created resources. 194 | 195 | Each test-kitchen instance creates necessary fixtures to house resources. 196 | 197 | ### Autogeneration of documentation from .tf files 198 | Run 199 | ``` 200 | make generate_docs 201 | ``` 202 | 203 | ### Linting 204 | The makefile in this project will lint or sometimes just format any shell, 205 | Python, golang, Terraform, or Dockerfiles. The linters will only be run if 206 | the makefile finds files with the appropriate file extension. 207 | 208 | All of the linter checks are in the default make target, so you just have to 209 | run 210 | 211 | ``` 212 | make -s 213 | ``` 214 | 215 | The -s is for 'silent'. Successful output looks like this 216 | 217 | ``` 218 | Running shellcheck 219 | Running flake8 220 | Running go fmt and go vet 221 | Running terraform validate 222 | Running hadolint on Dockerfiles 223 | Checking for required files 224 | Testing the validity of the header check 225 | .. 226 | ---------------------------------------------------------------------- 227 | Ran 2 tests in 0.026s 228 | 229 | OK 230 | Checking file headers 231 | The following lines have trailing whitespace 232 | ``` 233 | 234 | The linters 235 | are as follows: 236 | * Shell - shellcheck. Can be found in homebrew 237 | * Python - flake8. Can be installed with 'pip install flake8' 238 | * Golang - gofmt. gofmt comes with the standard golang installation. golang 239 | is a compiled language so there is no standard linter. 240 | * Terraform - terraform has a built-in linter in the 'terraform validate' 241 | command. 242 | * Dockerfiles - hadolint. Can be found in homebrew 243 | -------------------------------------------------------------------------------- /modules/flex/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_dataflow_flex_template_job" "dataflow_job" { 18 | provider = google-beta 19 | 20 | project = var.project_id 21 | name = var.name 22 | container_spec_gcs_path = var.container_spec_gcs_path 23 | temp_location = var.temp_location 24 | on_delete = var.on_delete 25 | region = var.region 26 | max_workers = var.max_workers 27 | service_account_email = var.service_account_email 28 | network = var.network_name 29 | subnetwork = var.subnetwork 30 | machine_type = var.machine_type 31 | launcher_machine_type = var.launcher_machine_type 32 | sdk_container_image = var.sdk_container_image 33 | ip_configuration = var.use_public_ips ? "WORKER_IP_PUBLIC" : "WORKER_IP_PRIVATE" 34 | enable_streaming_engine = var.enable_streaming_engine 35 | autoscaling_algorithm = var.autoscaling_algorithm 36 | skip_wait_on_job_termination = var.skip_wait_on_job_termination 37 | kms_key_name = var.kms_key_name 38 | additional_experiments = var.additional_experiments 39 | parameters = var.parameters 40 | labels = var.labels 41 | } 42 | -------------------------------------------------------------------------------- /modules/flex/metadata.display.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow-display 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Google Dataflow Flex Template Job Terraform Module 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | dir: /modules/flex 28 | ui: 29 | input: 30 | variables: 31 | additional_experiments: 32 | name: additional_experiments 33 | title: Additional Experiments 34 | autoscaling_algorithm: 35 | name: autoscaling_algorithm 36 | title: Autoscaling Algorithm 37 | container_spec_gcs_path: 38 | name: container_spec_gcs_path 39 | title: Container Spec Gcs Path 40 | enumValueLabels: 41 | - label: PUBSUB_TO_BIGQUERY_FLEX 42 | value: gs://adc-dataflow-templates/images/latest/flex/PubSub_to_BigQuery_Flex 43 | - label: PUBSUB_TO_GCS_TEXT_FLEX 44 | value: gs://adc-dataflow-templates/images/latest/flex/Cloud_PubSub_to_GCS_Text_Flex 45 | enable_streaming_engine: 46 | name: enable_streaming_engine 47 | title: Enable Streaming Engine 48 | kms_key_name: 49 | name: kms_key_name 50 | title: Kms Key Name 51 | labels: 52 | name: labels 53 | title: Labels 54 | launcher_machine_type: 55 | name: launcher_machine_type 56 | title: Launcher Machine Type 57 | machine_type: 58 | name: machine_type 59 | title: Machine Type 60 | max_workers: 61 | name: max_workers 62 | title: Max Workers 63 | name: 64 | name: name 65 | title: Name 66 | network_name: 67 | name: network_name 68 | title: Network Name 69 | on_delete: 70 | name: on_delete 71 | title: On Delete 72 | parameters: 73 | name: parameters 74 | title: Parameters 75 | project_id: 76 | name: project_id 77 | title: Project Id 78 | region: 79 | name: region 80 | title: Region 81 | sdk_container_image: 82 | name: sdk_container_image 83 | title: Sdk Container Image 84 | service_account_email: 85 | name: service_account_email 86 | title: Service Account Email 87 | skip_wait_on_job_termination: 88 | name: skip_wait_on_job_termination 89 | title: Skip Wait On Job Termination 90 | subnetwork: 91 | name: subnetwork 92 | title: Subnetwork 93 | temp_location: 94 | name: temp_location 95 | title: Temp Location 96 | use_public_ips: 97 | name: use_public_ips 98 | title: Use Public Ips 99 | level: 1 100 | altDefaults: 101 | - type: ALTERNATE_TYPE_DC 102 | value: true 103 | 104 | -------------------------------------------------------------------------------- /modules/flex/metadata.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Google Dataflow Flex Template Job Terraform Module 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | dir: /modules/flex 28 | version: 3.0.2 29 | actuationTool: 30 | flavor: Terraform 31 | version: ">= 1.3" 32 | description: {} 33 | content: 34 | examples: 35 | - name: dlp_api_example 36 | location: examples/dlp_api_example 37 | - name: simple_example 38 | location: examples/simple_example 39 | interfaces: 40 | variables: 41 | - name: project_id 42 | description: The project in which the resource belongs. If it is not provided, the provider project is used. 43 | varType: string 44 | required: true 45 | - name: name 46 | description: The name of the dataflow job 47 | varType: string 48 | required: true 49 | - name: container_spec_gcs_path 50 | description: The GCS path to the Dataflow job Flex Template. 51 | varType: string 52 | required: true 53 | - name: temp_location 54 | description: The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://. 55 | varType: string 56 | - name: on_delete 57 | description: One of drain or cancel. Specifies behavior of deletion during terraform destroy. The default is cancel. 58 | varType: string 59 | defaultValue: cancel 60 | - name: region 61 | description: The region in which the created job should run. Also determines the location of the staging bucket if created. 62 | varType: string 63 | defaultValue: us-central1 64 | - name: max_workers 65 | description: " The number of workers permitted to work on the job. More workers may improve processing speed at additional cost." 66 | varType: number 67 | defaultValue: 1 68 | - name: service_account_email 69 | description: The Service Account email that will be used to identify the VMs in which the jobs are running 70 | varType: string 71 | defaultValue: "" 72 | connections: 73 | - source: 74 | source: github.com/terraform-google-modules/terraform-google-service-accounts//modules/simple-sa 75 | version: ">= 4.3" 76 | spec: 77 | outputExpr: email 78 | - name: subnetwork 79 | description: The subnetwork to which VMs will be assigned. If provided, it should be of the form of 'regions/REGION/subnetworks/SUBNETWORK'. 80 | varType: string 81 | defaultValue: "" 82 | - name: network_name 83 | description: The network to which VMs will be assigned. 84 | varType: string 85 | defaultValue: default 86 | - name: launcher_machine_type 87 | description: The machine type to use for launching the job. 88 | varType: string 89 | defaultValue: "" 90 | - name: machine_type 91 | description: The machine type to use for the job. 92 | varType: string 93 | defaultValue: "" 94 | - name: sdk_container_image 95 | description: Docker registry location of container image to use for the 'worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable pipelines. 96 | varType: string 97 | - name: use_public_ips 98 | description: Specifies whether Dataflow workers use external IP addresses. If the value is set to false, Dataflow workers use internal IP addresses for all communication. 99 | varType: bool 100 | defaultValue: false 101 | - name: enable_streaming_engine 102 | description: Enable/disable the use of Streaming Engine for the job. 103 | varType: bool 104 | defaultValue: false 105 | - name: autoscaling_algorithm 106 | description: The algorithm to use for autoscaling. 107 | varType: string 108 | - name: skip_wait_on_job_termination 109 | description: If set to true, terraform will treat DRAINING and CANCELLING as terminal states when deleting the resource, and will remove the resource from terraform state and move on. 110 | varType: bool 111 | defaultValue: false 112 | - name: kms_key_name 113 | description: "The name for the Cloud KMS key for the job. Key format is: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY" 114 | varType: string 115 | - name: additional_experiments 116 | description: List of experiments that should be used by the job. An example value is `['enable_stackdriver_agent_metrics']` 117 | varType: list(string) 118 | defaultValue: [] 119 | - name: parameters 120 | description: Key/Value pairs to be passed to the Dataflow job (as used in the template). 121 | varType: map(string) 122 | defaultValue: {} 123 | connections: 124 | - source: 125 | source: github.com/terraform-google-modules/terraform-google-bigquery 126 | version: ">= 10.0.3" 127 | spec: 128 | outputExpr: "{\"OutputTableSpec\": table_fqns[0]}" 129 | - source: 130 | source: github.com/terraform-google-modules/terraform-google-pubsub 131 | version: ">= 8.0.1" 132 | spec: 133 | outputExpr: "{\"InputTopic\": id}" 134 | - source: 135 | source: github.com/terraform-google-modules/terraform-google-cloud-storage//modules/simple_bucket 136 | version: ">= 9.1.0" 137 | spec: 138 | outputExpr: "{\"OutputDirectory\": url, \"OutputFilenamePrefix\": \"adc-dataflow\"}" 139 | - name: labels 140 | description: User labels to be specified for the job. 141 | varType: map(string) 142 | defaultValue: {} 143 | outputs: 144 | - name: container_spec_gcs_path 145 | description: The GCS path to the Dataflow job Flex Template. 146 | type: string 147 | - name: id 148 | description: The unique Id of the newly created Dataflow job 149 | type: string 150 | - name: name 151 | description: The name of the dataflow job 152 | type: string 153 | - name: state 154 | description: The state of the newly created Dataflow job 155 | type: string 156 | - name: temp_location 157 | description: The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://. 158 | type: string 159 | requirements: 160 | roles: 161 | - level: Project 162 | roles: 163 | - roles/dataflow.admin 164 | - roles/dataflow.worker 165 | - roles/storage.admin 166 | - roles/compute.networkAdmin 167 | - roles/pubsub.editor 168 | - roles/bigquery.dataEditor 169 | - roles/artifactregistry.writer 170 | - roles/iam.serviceAccountUser 171 | - roles/resourcemanager.projectIamAdmin 172 | services: 173 | - cloudresourcemanager.googleapis.com 174 | - storage-api.googleapis.com 175 | - serviceusage.googleapis.com 176 | - dataflow.googleapis.com 177 | providerVersions: 178 | - source: hashicorp/google-beta 179 | version: ">= 3.53, < 7" 180 | -------------------------------------------------------------------------------- /modules/flex/outputs.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | output "name" { 18 | description = "The name of the dataflow job" 19 | value = google_dataflow_flex_template_job.dataflow_job.name 20 | } 21 | 22 | output "container_spec_gcs_path" { 23 | description = "The GCS path to the Dataflow job Flex Template." 24 | value = google_dataflow_flex_template_job.dataflow_job.container_spec_gcs_path 25 | } 26 | 27 | output "temp_location" { 28 | description = "The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://." 29 | value = google_dataflow_flex_template_job.dataflow_job.temp_location 30 | } 31 | 32 | output "state" { 33 | description = "The state of the newly created Dataflow job" 34 | value = google_dataflow_flex_template_job.dataflow_job.state 35 | } 36 | 37 | output "id" { 38 | description = "The unique Id of the newly created Dataflow job" 39 | value = google_dataflow_flex_template_job.dataflow_job.id 40 | } 41 | -------------------------------------------------------------------------------- /modules/flex/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | variable "project_id" { 17 | type = string 18 | description = "The project in which the resource belongs. If it is not provided, the provider project is used." 19 | } 20 | 21 | variable "name" { 22 | type = string 23 | description = "The name of the dataflow job" 24 | } 25 | 26 | variable "container_spec_gcs_path" { 27 | type = string 28 | description = "The GCS path to the Dataflow job Flex Template." 29 | } 30 | 31 | variable "temp_location" { 32 | type = string 33 | description = "The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://." 34 | default = null 35 | } 36 | 37 | variable "on_delete" { 38 | type = string 39 | description = "One of drain or cancel. Specifies behavior of deletion during terraform destroy. The default is cancel." 40 | default = "cancel" 41 | } 42 | 43 | variable "region" { 44 | type = string 45 | description = "The region in which the created job should run. Also determines the location of the staging bucket if created." 46 | default = "us-central1" 47 | } 48 | 49 | variable "max_workers" { 50 | type = number 51 | description = " The number of workers permitted to work on the job. More workers may improve processing speed at additional cost." 52 | default = 1 53 | } 54 | 55 | variable "service_account_email" { 56 | type = string 57 | description = "The Service Account email that will be used to identify the VMs in which the jobs are running" 58 | default = "" 59 | } 60 | 61 | variable "subnetwork" { 62 | type = string 63 | description = "The subnetwork to which VMs will be assigned. If provided, it should be of the form of 'regions/REGION/subnetworks/SUBNETWORK'." 64 | default = "" 65 | } 66 | 67 | variable "network_name" { 68 | type = string 69 | description = "The network to which VMs will be assigned." 70 | default = "default" 71 | } 72 | 73 | variable "launcher_machine_type" { 74 | type = string 75 | description = "The machine type to use for launching the job." 76 | default = "" 77 | } 78 | 79 | variable "machine_type" { 80 | type = string 81 | description = "The machine type to use for the job." 82 | default = "" 83 | } 84 | 85 | variable "sdk_container_image" { 86 | type = string 87 | description = "Docker registry location of container image to use for the 'worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable pipelines." 88 | default = null 89 | } 90 | 91 | variable "use_public_ips" { 92 | type = bool 93 | description = "Specifies whether Dataflow workers use external IP addresses. If the value is set to false, Dataflow workers use internal IP addresses for all communication." 94 | default = false 95 | } 96 | 97 | variable "enable_streaming_engine" { 98 | type = bool 99 | description = "Enable/disable the use of Streaming Engine for the job." 100 | default = false 101 | } 102 | 103 | variable "autoscaling_algorithm" { 104 | type = string 105 | description = "The algorithm to use for autoscaling." 106 | default = null 107 | } 108 | 109 | variable "skip_wait_on_job_termination" { 110 | type = bool 111 | description = "If set to true, terraform will treat DRAINING and CANCELLING as terminal states when deleting the resource, and will remove the resource from terraform state and move on." 112 | default = false 113 | } 114 | 115 | variable "kms_key_name" { 116 | type = string 117 | description = "The name for the Cloud KMS key for the job. Key format is: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY" 118 | default = null 119 | } 120 | 121 | variable "additional_experiments" { 122 | type = list(string) 123 | description = "List of experiments that should be used by the job. An example value is `['enable_stackdriver_agent_metrics']`" 124 | default = [] 125 | } 126 | 127 | variable "parameters" { 128 | type = map(string) 129 | description = "Key/Value pairs to be passed to the Dataflow job (as used in the template)." 130 | default = {} 131 | } 132 | 133 | variable "labels" { 134 | type = map(string) 135 | description = "User labels to be specified for the job." 136 | default = {} 137 | } 138 | -------------------------------------------------------------------------------- /modules/flex/versions.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_version = ">= 1.3" 19 | required_providers { 20 | 21 | google-beta = { 22 | source = "hashicorp/google-beta" 23 | version = ">= 3.53, < 7" 24 | } 25 | } 26 | 27 | provider_meta "google-beta" { 28 | module_name = "blueprints/terraform/terraform-google-dataflow:flex/v3.0.2" 29 | } 30 | 31 | } 32 | 33 | -------------------------------------------------------------------------------- /modules/legacy/README.md: -------------------------------------------------------------------------------- 1 | # [Google Dataflow Terraform Module](https://registry.terraform.io/modules/terraform-google-modules/dataflow/google) 2 | 3 | This module handles opiniated Dataflow job configuration and deployments. 4 | 5 | The resources/services/activations/deletions that this module will create/trigger are: 6 | - Create a GCS bucket for temporary job data 7 | - Create a Dataflow job 8 | 9 | ## Compatibility 10 | This module is meant for use with Terraform 0.13+ and tested using Terraform 1.0+. If you find incompatibilities using Terraform >=0.13, please open an issue. 11 | If you haven't 12 | [upgraded](https://www.terraform.io/upgrade-guides/0-13.html) and need a Terraform 13 | 0.12.x-compatible version of this module, the last released version 14 | intended for Terraform 0.12.x is [v1.0.0](https://registry.terraform.io/modules/terraform-google-modules/-dataflow/google/v1.0.0). 15 | 16 | ## Usage 17 | 18 | Before using this module, one should get familiar with the `google_dataflow_job`’s [Notes on “destroy”/”apply”](https://www.terraform.io/docs/providers/google/r/dataflow_job.html#note-on-quot-destroy-quot-quot-apply-quot-) as the behavior is atypical when compared to other resources. 19 | 20 | ### Assumption 21 | The module is made to be used with the template_gcs_path as the staging location. 22 | Hence, one assumption is that, before using this module, you already have working Dataflow job template(s) in GCS staging location(s). 23 | 24 | There are examples included in the [examples](./examples/) folder but simple usage is as follows: 25 | 26 | ```hcl 27 | module "dataflow-job" { 28 | source = "terraform-google-modules/dataflow/google//modules/legacy" 29 | version = "0.1.0" 30 | 31 | project_id = "" 32 | name = "" 33 | on_delete = "cancel" 34 | zone = "us-central1-a" 35 | max_workers = 1 36 | template_gcs_path = "gs://" 37 | temp_gcs_location = "gs:// 54 | ## Inputs 55 | 56 | | Name | Description | Type | Default | Required | 57 | |------|-------------|------|---------|:--------:| 58 | | additional\_experiments | List of experiments that should be used by the job. An example value is `['enable_stackdriver_agent_metrics']` | `list(string)` | `[]` | no | 59 | | enable\_streaming\_engine | Enable/disable the use of Streaming Engine for the job. | `bool` | `false` | no | 60 | | kms\_key\_name | The name for the Cloud KMS key for the job. Key format is: projects/PROJECT\_ID/locations/LOCATION/keyRings/KEY\_RING/cryptoKeys/KEY | `string` | `null` | no | 61 | | labels | User labels to be specified for the job. | `map(string)` | `{}` | no | 62 | | machine\_type | The machine type to use for the job. | `string` | `""` | no | 63 | | max\_workers | The number of workers permitted to work on the job. More workers may improve processing speed at additional cost. | `number` | `1` | no | 64 | | name | The name of the dataflow job | `string` | n/a | yes | 65 | | network\_name | The network to which VMs will be assigned. | `string` | `"default"` | no | 66 | | on\_delete | One of drain or cancel. Specifies behavior of deletion during terraform destroy. The default is cancel. | `string` | `"cancel"` | no | 67 | | parameters | Key/Value pairs to be passed to the Dataflow job (as used in the template). | `map(string)` | `{}` | no | 68 | | project\_id | The project in which the resource belongs. If it is not provided, the provider project is used. | `string` | n/a | yes | 69 | | region | The region in which the created job should run. Also determines the location of the staging bucket if created. | `string` | `"us-central1"` | no | 70 | | service\_account\_email | The Service Account email that will be used to identify the VMs in which the jobs are running | `string` | `""` | no | 71 | | skip\_wait\_on\_job\_termination | If set to true, terraform will treat DRAINING and CANCELLING as terminal states when deleting the resource, and will remove the resource from terraform state and move on. | `bool` | `false` | no | 72 | | subnetwork | The subnetwork to which VMs will be assigned. If provided, it should be of the form of 'regions/REGION/subnetworks/SUBNETWORK'. | `string` | `""` | no | 73 | | temp\_gcs\_location | A writeable location on GCS for the Dataflow job to dump its temporary data. It will be used to form the temp location path for the job, 'gs://TEMP\_GCS\_LOCATION/tmp\_dir'. | `string` | n/a | yes | 74 | | template\_gcs\_path | The GCS path to the Dataflow job template. | `string` | n/a | yes | 75 | | use\_public\_ips | Specifies whether Dataflow workers use external IP addresses. If the value is set to false, Dataflow workers use internal IP addresses for all communication. | `bool` | `false` | no | 76 | 77 | ## Outputs 78 | 79 | | Name | Description | 80 | |------|-------------| 81 | | id | The unique Id of the newly created Dataflow job | 82 | | name | The name of the dataflow job | 83 | | state | The state of the newly created Dataflow job | 84 | | temp\_gcs\_location | The GCS path for the Dataflow job's temporary data. | 85 | | template\_gcs\_path | The GCS path to the Dataflow job template. | 86 | 87 | 88 | 89 | ## Requirements 90 | 91 | Before this module can be used on a project, you must ensure that the following pre-requisites are fulfilled: 92 | 93 | 1. Terraform is [installed](#software-dependencies) on the machine where Terraform is executed. 94 | 2. The Service Account you execute the module with has the right [permissions](#configure-a-service-account). 95 | 3. The necessary APIs are [active](#enable-apis) on the project. 96 | 4. A working Dataflow template in uploaded in a GCS bucket 97 | 98 | The [project factory](https://github.com/terraform-google-modules/terraform-google-project-factory) can be used to provision projects with the correct APIs active. 99 | 100 | ### Software Dependencies 101 | ### Terraform 102 | - [Terraform](https://www.terraform.io/downloads.html) >= 0.13.0 103 | - [terraform-provider-google](https://github.com/terraform-providers/terraform-provider-google) plugin v2.18.0 104 | 105 | ### Configure a Service Account to execute the module 106 | 107 | In order to execute this module you must have a Service Account with the 108 | following project roles: 109 | 110 | - roles/dataflow.admin 111 | - roles/iam.serviceAccountUser 112 | - roles/storage.admin 113 | 114 | ### Configure a Controller Service Account to create the job 115 | 116 | If you want to use the service_account_email input to specify a service account that will identify the VMs in which the jobs are running, the service account will need the following project roles: 117 | 118 | - roles/dataflow.worker 119 | - roles/storage.objectAdmin 120 | 121 | ### Configure a Customer Managed Encryption Key 122 | 123 | If you want to use [Customer Managed Encryption Keys](https://cloud.google.com/kms/docs/cmek) in the [Dataflow Job](https://cloud.google.com/dataflow/docs/guides/customer-managed-encryption-keys) use the variable `kms_key_name` to provide a valid key. 124 | Follow the instructions in [Granting Encrypter/Decrypter permissions](https://cloud.google.com/dataflow/docs/guides/customer-managed-encryption-keys#granting_encrypterdecrypter_permissions) to configure the necessary roles for the Dataflow service accounts. 125 | 126 | ### Enable APIs 127 | 128 | In order to launch a Dataflow Job, the Dataflow API must be enabled: 129 | 130 | - Dataflow API - `dataflow.googleapis.com` 131 | - Compute Engine API: `compute.googleapis.com` 132 | 133 | **Note:** If you want to use a Customer Managed Encryption Key, the Cloud Key Management Service (KMS) API must be enabled: 134 | 135 | - Cloud Key Management Service (KMS) API: `cloudkms.googleapis.com` 136 | 137 | ## Install 138 | 139 | ### Terraform 140 | Be sure you have the correct Terraform version (0.12.x), you can choose the binary here: 141 | - https://releases.hashicorp.com/terraform/ 142 | 143 | ## Testing 144 | 145 | ### Requirements 146 | - [bundler](https://github.com/bundler/bundler) 147 | - [gcloud](https://cloud.google.com/sdk/install) 148 | - [terraform-docs](https://github.com/segmentio/terraform-docs/releases) 0.6.0 149 | 150 | ### Autogeneration of documentation from .tf files 151 | Run 152 | ``` 153 | make generate_docs 154 | ``` 155 | 156 | ### Integration test 157 | 158 | Integration tests are run though [test-kitchen](https://github.com/test-kitchen/test-kitchen), [kitchen-terraform](https://github.com/newcontext-oss/kitchen-terraform), and [InSpec](https://github.com/inspec/inspec). 159 | 160 | `test-kitchen` instances are defined in [`.kitchen.yml`](./.kitchen.yml). The test-kitchen instances in `test/fixtures/` wrap identically-named examples in the `examples/` directory. 161 | 162 | #### Setup 163 | 164 | 1. Configure the [test fixtures](#test-configuration) 165 | 2. Download a Service Account key with the necessary permissions and put it in the module's root directory with the name `credentials.json`. 166 | 3. Build the Docker container for testing: 167 | 168 | ``` 169 | make docker_build_kitchen_terraform 170 | ``` 171 | 4. Run the testing container in interactive mode: 172 | 173 | ``` 174 | make docker_run 175 | ``` 176 | 177 | The module root directory will be loaded into the Docker container at `/cft/workdir/`. 178 | 5. Run kitchen-terraform to test the infrastructure: 179 | 180 | 1. `kitchen create` creates Terraform state and downloads modules, if applicable. 181 | 2. `kitchen converge` creates the underlying resources. Run `kitchen converge ` to create resources for a specific test case. 182 | 3. `kitchen verify` tests the created infrastructure. Run `kitchen verify ` to run a specific test case. 183 | 4. `kitchen destroy` tears down the underlying resources created by `kitchen converge`. Run `kitchen destroy ` to tear down resources for a specific test case. 184 | 185 | Alternatively, you can simply run `make test_integration_docker` to run all the test steps non-interactively. 186 | 187 | #### Test configuration 188 | 189 | Each test-kitchen instance is configured with a `variables.tfvars` file in the test fixture directory. For convenience, since all of the variables are project-specific, these files have been symlinked to `test/fixtures/shared/terraform.tfvars`. 190 | Similarly, each test fixture has a `variables.tf` to define these variables, and an `outputs.tf` to facilitate providing necessary information for `inspec` to locate and query against created resources. 191 | 192 | Each test-kitchen instance creates necessary fixtures to house resources. 193 | 194 | ### Autogeneration of documentation from .tf files 195 | Run 196 | ``` 197 | make generate_docs 198 | ``` 199 | 200 | ### Linting 201 | The makefile in this project will lint or sometimes just format any shell, 202 | Python, golang, Terraform, or Dockerfiles. The linters will only be run if 203 | the makefile finds files with the appropriate file extension. 204 | 205 | All of the linter checks are in the default make target, so you just have to 206 | run 207 | 208 | ``` 209 | make -s 210 | ``` 211 | 212 | The -s is for 'silent'. Successful output looks like this 213 | 214 | ``` 215 | Running shellcheck 216 | Running flake8 217 | Running go fmt and go vet 218 | Running terraform validate 219 | Running hadolint on Dockerfiles 220 | Checking for required files 221 | Testing the validity of the header check 222 | .. 223 | ---------------------------------------------------------------------- 224 | Ran 2 tests in 0.026s 225 | 226 | OK 227 | Checking file headers 228 | The following lines have trailing whitespace 229 | ``` 230 | 231 | The linters 232 | are as follows: 233 | * Shell - shellcheck. Can be found in homebrew 234 | * Python - flake8. Can be installed with 'pip install flake8' 235 | * Golang - gofmt. gofmt comes with the standard golang installation. golang 236 | is a compiled language so there is no standard linter. 237 | * Terraform - terraform has a built-in linter in the 'terraform validate' 238 | command. 239 | * Dockerfiles - hadolint. Can be found in homebrew 240 | -------------------------------------------------------------------------------- /modules/legacy/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_dataflow_job" "dataflow_job" { 18 | project = var.project_id 19 | name = var.name 20 | template_gcs_path = var.template_gcs_path 21 | temp_gcs_location = "gs://${var.temp_gcs_location}/tmp_dir" 22 | on_delete = var.on_delete 23 | region = var.region 24 | max_workers = var.max_workers 25 | parameters = var.parameters 26 | service_account_email = var.service_account_email 27 | network = var.network_name 28 | subnetwork = var.subnetwork 29 | machine_type = var.machine_type 30 | ip_configuration = var.use_public_ips ? "WORKER_IP_PUBLIC" : "WORKER_IP_PRIVATE" 31 | enable_streaming_engine = var.enable_streaming_engine 32 | skip_wait_on_job_termination = var.skip_wait_on_job_termination 33 | additional_experiments = var.additional_experiments 34 | kms_key_name = var.kms_key_name 35 | labels = var.labels 36 | } 37 | 38 | -------------------------------------------------------------------------------- /modules/legacy/metadata.display.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow-display 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Google Dataflow Terraform Module 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | dir: /modules/legacy 28 | ui: 29 | input: 30 | variables: 31 | additional_experiments: 32 | name: additional_experiments 33 | title: Additional Experiments 34 | enable_streaming_engine: 35 | name: enable_streaming_engine 36 | title: Enable Streaming Engine 37 | kms_key_name: 38 | name: kms_key_name 39 | title: Kms Key Name 40 | labels: 41 | name: labels 42 | title: Labels 43 | machine_type: 44 | name: machine_type 45 | title: Machine Type 46 | max_workers: 47 | name: max_workers 48 | title: Max Workers 49 | name: 50 | name: name 51 | title: Name 52 | network_name: 53 | name: network_name 54 | title: Network Name 55 | on_delete: 56 | name: on_delete 57 | title: On Delete 58 | parameters: 59 | name: parameters 60 | title: Parameters 61 | project_id: 62 | name: project_id 63 | title: Project Id 64 | region: 65 | name: region 66 | title: Region 67 | service_account_email: 68 | name: service_account_email 69 | title: Service Account Email 70 | skip_wait_on_job_termination: 71 | name: skip_wait_on_job_termination 72 | title: Skip Wait On Job Termination 73 | subnetwork: 74 | name: subnetwork 75 | title: Subnetwork 76 | temp_gcs_location: 77 | name: temp_gcs_location 78 | title: Temp Gcs Location 79 | template_gcs_path: 80 | name: template_gcs_path 81 | title: Template Gcs Path 82 | use_public_ips: 83 | name: use_public_ips 84 | title: Use Public Ips 85 | -------------------------------------------------------------------------------- /modules/legacy/metadata.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: blueprints.cloud.google.com/v1alpha1 16 | kind: BlueprintMetadata 17 | metadata: 18 | name: terraform-google-dataflow 19 | annotations: 20 | config.kubernetes.io/local-config: "true" 21 | spec: 22 | info: 23 | title: Google Dataflow Terraform Module 24 | source: 25 | repo: https://github.com/terraform-google-modules/terraform-google-dataflow.git 26 | sourceType: git 27 | dir: /modules/legacy 28 | version: 3.0.2 29 | actuationTool: 30 | flavor: Terraform 31 | version: ">= 0.13" 32 | description: {} 33 | content: 34 | examples: 35 | - name: dlp_api_example 36 | location: examples/dlp_api_example 37 | - name: simple_example 38 | location: examples/simple_example 39 | interfaces: 40 | variables: 41 | - name: project_id 42 | description: The project in which the resource belongs. If it is not provided, the provider project is used. 43 | varType: string 44 | required: true 45 | - name: name 46 | description: The name of the dataflow job 47 | varType: string 48 | required: true 49 | - name: template_gcs_path 50 | description: The GCS path to the Dataflow job template. 51 | varType: string 52 | required: true 53 | - name: temp_gcs_location 54 | description: A writeable location on GCS for the Dataflow job to dump its temporary data. It will be used to form the temp location path for the job, 'gs://TEMP_GCS_LOCATION/tmp_dir'. 55 | varType: string 56 | required: true 57 | connections: 58 | - source: 59 | source: github.com/terraform-google-modules/terraform-google-cloud-storage//modules/simple_bucket 60 | version: ">= 9.0.1" 61 | spec: 62 | outputExpr: url 63 | - name: on_delete 64 | description: One of drain or cancel. Specifies behavior of deletion during terraform destroy. The default is cancel. 65 | varType: string 66 | defaultValue: cancel 67 | - name: region 68 | description: The region in which the created job should run. Also determines the location of the staging bucket if created. 69 | varType: string 70 | defaultValue: us-central1 71 | - name: max_workers 72 | description: " The number of workers permitted to work on the job. More workers may improve processing speed at additional cost." 73 | varType: number 74 | defaultValue: 1 75 | - name: service_account_email 76 | description: The Service Account email that will be used to identify the VMs in which the jobs are running 77 | varType: string 78 | defaultValue: "" 79 | connections: 80 | - source: 81 | source: github.com/terraform-google-modules/terraform-google-service-accounts//modules/simple-sa 82 | version: ~> 4.3 83 | spec: 84 | outputExpr: email 85 | - name: subnetwork 86 | description: The subnetwork to which VMs will be assigned. If provided, it should be of the form of 'regions/REGION/subnetworks/SUBNETWORK'. 87 | varType: string 88 | defaultValue: "" 89 | - name: network_name 90 | description: The network to which VMs will be assigned. 91 | varType: string 92 | defaultValue: default 93 | - name: machine_type 94 | description: The machine type to use for the job. 95 | varType: string 96 | defaultValue: "" 97 | - name: use_public_ips 98 | description: Specifies whether Dataflow workers use external IP addresses. If the value is set to false, Dataflow workers use internal IP addresses for all communication. 99 | varType: bool 100 | defaultValue: false 101 | - name: enable_streaming_engine 102 | description: Enable/disable the use of Streaming Engine for the job. 103 | varType: bool 104 | defaultValue: false 105 | - name: skip_wait_on_job_termination 106 | description: If set to true, terraform will treat DRAINING and CANCELLING as terminal states when deleting the resource, and will remove the resource from terraform state and move on. 107 | varType: bool 108 | defaultValue: false 109 | - name: kms_key_name 110 | description: "The name for the Cloud KMS key for the job. Key format is: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY" 111 | varType: string 112 | - name: additional_experiments 113 | description: List of experiments that should be used by the job. An example value is `['enable_stackdriver_agent_metrics']` 114 | varType: list(string) 115 | defaultValue: [] 116 | - name: parameters 117 | description: Key/Value pairs to be passed to the Dataflow job (as used in the template). 118 | varType: map(string) 119 | defaultValue: {} 120 | - name: labels 121 | description: User labels to be specified for the job. 122 | varType: map(string) 123 | defaultValue: {} 124 | outputs: 125 | - name: id 126 | description: The unique Id of the newly created Dataflow job 127 | type: string 128 | - name: name 129 | description: The name of the dataflow job 130 | type: string 131 | - name: state 132 | description: The state of the newly created Dataflow job 133 | type: string 134 | - name: temp_gcs_location 135 | description: The GCS path for the Dataflow job's temporary data. 136 | type: string 137 | - name: template_gcs_path 138 | description: The GCS path to the Dataflow job template. 139 | type: string 140 | requirements: 141 | roles: 142 | - level: Project 143 | roles: 144 | - roles/dataflow.admin 145 | - roles/dataflow.worker 146 | - roles/storage.admin 147 | - roles/compute.networkAdmin 148 | - roles/pubsub.editor 149 | - roles/bigquery.dataEditor 150 | - roles/artifactregistry.writer 151 | - roles/iam.serviceAccountUser 152 | - roles/resourcemanager.projectIamAdmin 153 | services: 154 | - cloudresourcemanager.googleapis.com 155 | - storage-api.googleapis.com 156 | - serviceusage.googleapis.com 157 | - dataflow.googleapis.com 158 | providerVersions: 159 | - source: hashicorp/google 160 | version: ">= 3.53, < 7" 161 | -------------------------------------------------------------------------------- /modules/legacy/outputs.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | output "name" { 18 | description = "The name of the dataflow job" 19 | value = google_dataflow_job.dataflow_job.name 20 | } 21 | 22 | output "template_gcs_path" { 23 | description = "The GCS path to the Dataflow job template." 24 | value = google_dataflow_job.dataflow_job.template_gcs_path 25 | } 26 | 27 | output "temp_gcs_location" { 28 | description = "The GCS path for the Dataflow job's temporary data." 29 | value = google_dataflow_job.dataflow_job.temp_gcs_location 30 | } 31 | 32 | output "state" { 33 | description = "The state of the newly created Dataflow job" 34 | value = google_dataflow_job.dataflow_job.state 35 | } 36 | 37 | output "id" { 38 | description = "The unique Id of the newly created Dataflow job" 39 | value = google_dataflow_job.dataflow_job.id 40 | } 41 | 42 | -------------------------------------------------------------------------------- /modules/legacy/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | variable "project_id" { 17 | type = string 18 | description = "The project in which the resource belongs. If it is not provided, the provider project is used." 19 | } 20 | 21 | variable "name" { 22 | type = string 23 | description = "The name of the dataflow job" 24 | } 25 | 26 | variable "template_gcs_path" { 27 | type = string 28 | description = "The GCS path to the Dataflow job template." 29 | } 30 | 31 | variable "temp_gcs_location" { 32 | type = string 33 | description = "A writeable location on GCS for the Dataflow job to dump its temporary data. It will be used to form the temp location path for the job, 'gs://TEMP_GCS_LOCATION/tmp_dir'." 34 | } 35 | 36 | variable "on_delete" { 37 | type = string 38 | description = "One of drain or cancel. Specifies behavior of deletion during terraform destroy. The default is cancel." 39 | default = "cancel" 40 | } 41 | 42 | variable "region" { 43 | type = string 44 | description = "The region in which the created job should run. Also determines the location of the staging bucket if created." 45 | default = "us-central1" 46 | } 47 | 48 | variable "max_workers" { 49 | type = number 50 | description = " The number of workers permitted to work on the job. More workers may improve processing speed at additional cost." 51 | default = 1 52 | } 53 | 54 | variable "service_account_email" { 55 | type = string 56 | description = "The Service Account email that will be used to identify the VMs in which the jobs are running" 57 | default = "" 58 | } 59 | 60 | variable "subnetwork" { 61 | type = string 62 | description = "The subnetwork to which VMs will be assigned. If provided, it should be of the form of 'regions/REGION/subnetworks/SUBNETWORK'." 63 | default = "" 64 | } 65 | 66 | variable "network_name" { 67 | type = string 68 | description = "The network to which VMs will be assigned." 69 | default = "default" 70 | } 71 | 72 | variable "machine_type" { 73 | type = string 74 | description = "The machine type to use for the job." 75 | default = "" 76 | } 77 | 78 | variable "use_public_ips" { 79 | type = bool 80 | description = "Specifies whether Dataflow workers use external IP addresses. If the value is set to false, Dataflow workers use internal IP addresses for all communication." 81 | default = false 82 | } 83 | 84 | variable "enable_streaming_engine" { 85 | type = bool 86 | description = "Enable/disable the use of Streaming Engine for the job." 87 | default = false 88 | } 89 | 90 | variable "skip_wait_on_job_termination" { 91 | type = bool 92 | description = "If set to true, terraform will treat DRAINING and CANCELLING as terminal states when deleting the resource, and will remove the resource from terraform state and move on." 93 | default = false 94 | } 95 | 96 | variable "kms_key_name" { 97 | type = string 98 | description = "The name for the Cloud KMS key for the job. Key format is: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY" 99 | default = null 100 | } 101 | 102 | variable "additional_experiments" { 103 | type = list(string) 104 | description = "List of experiments that should be used by the job. An example value is `['enable_stackdriver_agent_metrics']`" 105 | default = [] 106 | } 107 | 108 | variable "parameters" { 109 | type = map(string) 110 | description = "Key/Value pairs to be passed to the Dataflow job (as used in the template)." 111 | default = {} 112 | } 113 | 114 | variable "labels" { 115 | type = map(string) 116 | description = "User labels to be specified for the job." 117 | default = {} 118 | } 119 | -------------------------------------------------------------------------------- /modules/legacy/versions.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_version = ">= 0.13" 19 | required_providers { 20 | 21 | google = { 22 | source = "hashicorp/google" 23 | version = ">= 3.53, < 7" 24 | } 25 | } 26 | 27 | provider_meta "google" { 28 | module_name = "blueprints/terraform/terraform-google-dataflow/v3.0.2" 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | source.sh 2 | -------------------------------------------------------------------------------- /test/fixtures/simple_example/README.md: -------------------------------------------------------------------------------- 1 | # Simple Example 2 | 3 | This example illustrates how to use the `dataflow-module` module. 4 | 5 | [^]: (autogen_docs_start) 6 | 7 | ## Inputs 8 | 9 | | Name | Description | Type | Default | Required | 10 | |------|-------------|:----:|:-----:|:-----:| 11 | | project\_id | The project_id to deploy the example instance into. (e.g. "simple-sample-project-1234") | string | n/a | yes | 12 | | region | The region to deploy to | string | n/a | yes | 13 | | service\_account\_email | The Service Account email used to create the job. | string | n/a | yes | 14 | 15 | ## Outputs 16 | 17 | | Name | Description | 18 | |------|-------------| 19 | | bucket\_name | The name of the bucket | 20 | | df\_job\_id | The unique Id of the newly created Dataflow job | 21 | | df\_job\_id\_2 | The unique Id of the newly created Dataflow job | 22 | | df\_job\_name | The name of the newly created Dataflow job | 23 | | df\_job\_name\_2 | The name of the newly created Dataflow job | 24 | | df\_job\_state | The state of the newly created Dataflow job | 25 | | df\_job\_state\_2 | The state of the newly created Dataflow job | 26 | | project\_id | The project id used when managing resources. | 27 | | region | The region used when managing resources. | 28 | 29 | [^]: (autogen_docs_end) 30 | 31 | To provision this example, run the following from within this directory: 32 | - `terraform init` to get the plugins 33 | - `terraform plan` to see the infrastructure plan 34 | - `terraform apply` to apply the infrastructure build 35 | - `terraform destroy` to destroy the built infrastructure 36 | 37 | 38 | ## Inputs 39 | 40 | | Name | Description | Type | Default | Required | 41 | |------|-------------|------|---------|:--------:| 42 | | project\_id | The project\_id to deploy the example instance into. (e.g. "simple-sample-project-1234") | `string` | n/a | yes | 43 | | region | The region to deploy to | `string` | `"us-east1"` | no | 44 | | service\_account\_email | The Service Account email used to create the job. | `string` | n/a | yes | 45 | | zone | The zone to deploy to | `string` | `"us-central1-a"` | no | 46 | 47 | ## Outputs 48 | 49 | | Name | Description | 50 | |------|-------------| 51 | | bucket\_name | The name of the bucket | 52 | | df\_job\_id | The unique Id of the newly created Dataflow job | 53 | | df\_job\_id\_2 | The unique Id of the newly created Dataflow job | 54 | | df\_job\_name | The name of the newly created Dataflow job | 55 | | df\_job\_name\_2 | The name of the newly created Dataflow job | 56 | | df\_job\_state | The state of the newly created Dataflow job | 57 | | df\_job\_state\_2 | The state of the newly created Dataflow job | 58 | | project\_id | The project id used when managing resources. | 59 | | region | The region used when managing resources. | 60 | 61 | 62 | -------------------------------------------------------------------------------- /test/fixtures/simple_example/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | module "example" { 18 | source = "../../../examples/simple_example" 19 | project_id = var.project_id 20 | region = var.region 21 | service_account_email = var.service_account_email 22 | force_destroy = true 23 | } 24 | 25 | -------------------------------------------------------------------------------- /test/fixtures/simple_example/outputs.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | output "project_id" { 18 | description = "The project id used when managing resources." 19 | value = var.project_id 20 | } 21 | 22 | output "region" { 23 | description = "The region used when managing resources." 24 | value = var.region 25 | } 26 | 27 | output "df_job_name" { 28 | description = "The name of the newly created Dataflow job" 29 | value = module.example.df_job_name 30 | } 31 | 32 | output "df_job_state" { 33 | description = "The state of the newly created Dataflow job" 34 | value = module.example.df_job_state 35 | } 36 | 37 | output "df_job_id" { 38 | description = "The unique Id of the newly created Dataflow job" 39 | value = module.example.df_job_id 40 | } 41 | 42 | output "df_job_name_2" { 43 | description = "The name of the newly created Dataflow job" 44 | value = module.example.df_job_name_2 45 | } 46 | 47 | output "df_job_state_2" { 48 | description = "The state of the newly created Dataflow job" 49 | value = module.example.df_job_state_2 50 | } 51 | 52 | output "df_job_id_2" { 53 | description = "The unique Id of the newly created Dataflow job" 54 | value = module.example.df_job_id_2 55 | } 56 | 57 | output "bucket_name" { 58 | description = "The name of the bucket" 59 | value = module.example.bucket_name 60 | } 61 | 62 | -------------------------------------------------------------------------------- /test/fixtures/simple_example/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "project_id" { 18 | type = string 19 | description = "The project_id to deploy the example instance into. (e.g. \"simple-sample-project-1234\")" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "The region to deploy to" 25 | default = "us-east1" 26 | } 27 | 28 | variable "zone" { 29 | type = string 30 | description = "The zone to deploy to" 31 | default = "us-central1-a" 32 | } 33 | 34 | variable "service_account_email" { 35 | type = string 36 | description = "The Service Account email used to create the job." 37 | } 38 | 39 | -------------------------------------------------------------------------------- /test/fixtures/simple_example/versions.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_version = ">= 0.12" 19 | } 20 | -------------------------------------------------------------------------------- /test/integration/simple_example/controls/gcloud.rb: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | project_id = attribute('project_id') 16 | region = attribute('region') 17 | df_job_state = attribute('df_job_state') 18 | df_job_id = attribute('df_job_id') 19 | df_job_state_2 = attribute('df_job_state_2') 20 | df_job_id_2 = attribute('df_job_id_2') 21 | bucket_name = attribute('bucket_name') 22 | 23 | control "gcloud" do 24 | title "jobs ids match" 25 | describe command("gcloud --project=#{project_id} dataflow jobs list --format=json --region=#{region}") do 26 | its(:exit_status) { should eq 0 } 27 | its(:stderr) { should eq '' } 28 | 29 | 30 | let(:data) do 31 | if subject.exit_status == 0 32 | JSON.parse(subject.stdout) 33 | else 34 | {} 35 | end 36 | end 37 | 38 | context "gcloud dataflow jobs list OUTPUT" do 39 | describe "contains all successful and failed jobs for the target project" do 40 | it "should include the newly created dataflow jobs' job_ids" do 41 | expect(data).to include( 42 | including( 43 | "id" => "#{df_job_id}" 44 | ), 45 | including( 46 | "id" => "#{df_job_id_2}" 47 | ) 48 | ) 49 | end 50 | end 51 | end 52 | 53 | end 54 | end 55 | 56 | 57 | control "gcloud_dataflow" do 58 | title "jobs state" 59 | describe command("gcloud --project=#{project_id} dataflow jobs list --format=json --region=#{region}") do 60 | its(:exit_status) { should eq 0 } 61 | its(:stderr) { should eq '' } 62 | 63 | 64 | context "google_dataflow_job 1 state attribute" do 65 | it "should be a stable state (e.g JOB_STATE_RUNNING or JOB_STATE_PENDING)" do 66 | expect(df_job_state).to match(/(JOB_STATE_RUNNING|JOB_STATE_PENDING)/) 67 | end 68 | end 69 | 70 | context "google_dataflow_job 2 state attribute" do 71 | it "should be a stable state (e.g JOB_STATE_RUNNING or JOB_STATE_PENDING)" do 72 | expect(df_job_state_2).to match(/(JOB_STATE_RUNNING|JOB_STATE_PENDING)/) 73 | end 74 | end 75 | 76 | end 77 | end 78 | 79 | control "gsutil" do 80 | title "bucket configuration" 81 | describe command("gsutil -o Credentials:gs_service_key_file=$GOOGLE_APPLICATION_CREDENTIALS lifecycle get gs://#{bucket_name} --project=#{project_id}") do 82 | its(:exit_status) { should eq 0 } 83 | its(:stderr) { should eq '' } 84 | its('stdout') { should match("has no lifecycle configuration.") } 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /test/integration/simple_example/inspec.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: simple_example 16 | depends: 17 | - name: inspec-gcp 18 | git: https://github.com/inspec/inspec-gcp.git 19 | tag: v0.10.0 20 | attributes: 21 | - name: project_id 22 | required: true 23 | type: string 24 | - name: region 25 | required: true 26 | type: string 27 | - name: df_job_name 28 | required: true 29 | type: string 30 | - name: df_job_state 31 | required: true 32 | type: string 33 | - name: df_job_id 34 | required: true 35 | type: string 36 | - name: df_job_name_2 37 | required: true 38 | type: string 39 | - name: df_job_state_2 40 | required: true 41 | type: string 42 | - name: df_job_id_2 43 | required: true 44 | type: string 45 | - name: bucket_name 46 | required: true 47 | type: string 48 | -------------------------------------------------------------------------------- /test/setup/.gitignore: -------------------------------------------------------------------------------- 1 | terraform.tfvars 2 | source.sh 3 | -------------------------------------------------------------------------------- /test/setup/iam.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | locals { 18 | int_required_roles = [ 19 | "roles/dataflow.admin", 20 | "roles/dataflow.worker", 21 | "roles/storage.admin", 22 | "roles/compute.networkAdmin", 23 | "roles/pubsub.editor", 24 | "roles/bigquery.dataEditor", 25 | "roles/artifactregistry.writer", 26 | "roles/iam.serviceAccountUser", 27 | "roles/resourcemanager.projectIamAdmin" 28 | ] 29 | } 30 | 31 | resource "google_service_account" "int_test" { 32 | project = module.project-ci-dataflow.project_id 33 | account_id = "ci-dataflow" 34 | display_name = "ci-dataflow" 35 | } 36 | 37 | resource "google_project_iam_member" "int_test" { 38 | count = length(local.int_required_roles) 39 | 40 | project = module.project-ci-dataflow.project_id 41 | role = local.int_required_roles[count.index] 42 | member = "serviceAccount:${google_service_account.int_test.email}" 43 | } 44 | 45 | resource "google_service_account_key" "int_test" { 46 | service_account_id = google_service_account.int_test.id 47 | } 48 | -------------------------------------------------------------------------------- /test/setup/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | module "project-ci-dataflow" { 18 | source = "terraform-google-modules/project-factory/google" 19 | version = "~> 17.0" 20 | 21 | name = "ci-test-df" 22 | random_project_id = "true" 23 | org_id = var.org_id 24 | folder_id = var.folder_id 25 | billing_account = var.billing_account 26 | default_service_account = "keep" 27 | 28 | activate_apis = [ 29 | "cloudresourcemanager.googleapis.com", 30 | "storage-api.googleapis.com", 31 | "serviceusage.googleapis.com", 32 | "dataflow.googleapis.com" 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /test/setup/outputs.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | output "project_id" { 18 | value = module.project-ci-dataflow.project_id 19 | } 20 | 21 | output "sa_key" { 22 | value = google_service_account_key.int_test.private_key 23 | sensitive = true 24 | } 25 | 26 | output "service_account_email" { 27 | value = google_service_account.int_test.email 28 | } 29 | -------------------------------------------------------------------------------- /test/setup/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | variable "org_id" { 17 | description = "The numeric organization id" 18 | } 19 | 20 | variable "folder_id" { 21 | description = "The folder to deploy in" 22 | } 23 | 24 | variable "billing_account" { 25 | description = "The billing account id associated with the project, e.g. XXXXXX-YYYYYY-ZZZZZZ" 26 | } 27 | -------------------------------------------------------------------------------- /test/setup/versions.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_version = ">= 0.13" 19 | required_providers { 20 | google = { 21 | source = "hashicorp/google" 22 | version = ">= 3.53, < 7" 23 | } 24 | } 25 | } 26 | --------------------------------------------------------------------------------