├── tests ├── __init__.py ├── data │ ├── extra_source.txt │ └── customization_script_with_extra_sources.sh ├── test_customize_conda_script.sh ├── test_create_custom_image.sh ├── test_infer_subminor_version.sh ├── test_shell_script_generator.py └── test_args_parser.py ├── custom_image_utils ├── __init__.py ├── constants.py ├── shell_image_creator.py ├── shell_script_executor.py ├── image_labeller.py ├── expiration_notifier.py ├── smoke_test_runner.py ├── shell_script_generator.py ├── args_parser.py └── args_inferer.py ├── cloud_build ├── cloudbuild.yaml └── presubmit.sh ├── startup_script ├── README.md └── run.sh ├── .gitignore ├── Makefile ├── examples ├── patch-log4j.sh └── customization_script.sh ├── CONTRIBUTING.md ├── generate_custom_image.py ├── scripts └── customize_conda.sh ├── README.md └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cloud_build/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/gcloud' 3 | id: 'presubmit' 4 | entrypoint: 'bash' 5 | args: ['cloud_build/presubmit.sh'] 6 | -------------------------------------------------------------------------------- /startup_script/README.md: -------------------------------------------------------------------------------- 1 | [GCE VM startup script](https://cloud.google.com/compute/docs/startupscript) 2 | which downloads and runs the user-provided customization script. 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__ 3 | *.pyc 4 | 5 | # Ignore IntelliJ files. 6 | .idea/ 7 | *.iml 8 | *.ipr 9 | *.iws 10 | 11 | # MacOS folder files 12 | .DS_Store 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | .PHONY: tests 3 | 4 | default: clean unit_tests 5 | 6 | clean: 7 | rm -f custom_image_utils/*.pyc tests/*.pyc 8 | 9 | unit_tests: 10 | python2 -m unittest discover 11 | 12 | integration_tests: 13 | bash tests/test_create_custom_image.sh 14 | -------------------------------------------------------------------------------- /tests/data/extra_source.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | Example extra source file 17 | -------------------------------------------------------------------------------- /cloud_build/presubmit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2019 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Run all tests 18 | python2 -m unittest discover 19 | -------------------------------------------------------------------------------- /tests/data/customization_script_with_extra_sources.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2019 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | cat extra/source.txt 19 | -------------------------------------------------------------------------------- /examples/patch-log4j.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euxo pipefail 4 | 5 | # This script applies patches to log4j jars of version [2.0.0, 2.16.0) 6 | # for CVE-2021-44228 in Dataproc custom images. 7 | 8 | function main() { 9 | echo "Searching for log4j jars of version [2.0.0, 2.16.0)..." 10 | local -a jars 11 | mapfile -t jars < <(find / -regextype egrep -regex ".*/log4j-core-2\.([0-9]|1[0-5])(\.[0-9]+)?\.jar$" || true) 12 | echo "Found ${#jars[@]} jars" 13 | for jar in "${jars[@]}"; do 14 | echo "Patching ${jar}" 15 | zip -q -d "${jar}" org/apache/logging/log4j/core/lookup/JndiLookup.class \ 16 | || { echo "Failed patching ${jar}"; exit 1; } 17 | echo "Done with patching ${jar}" 18 | done 19 | 20 | echo "All done" 21 | } 22 | 23 | main "$@" 24 | -------------------------------------------------------------------------------- /custom_image_utils/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Constant variables for building custom image.""" 15 | 16 | version_help_text = """\ 17 | The dataproc image version to be used for building the custom Dataproc 18 | image. The image version is in the format of: 19 | 20 | version_major.version_minor.version_patch 21 | 22 | Example: 23 | 1.2.13 24 | 25 | Please refer to https://cloud.google.com/dataproc/docs/concepts/versioning/overview 26 | for more information on image versions. 27 | """ 28 | -------------------------------------------------------------------------------- /examples/customization_script.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2019 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euxo pipefail 18 | 19 | METADATA1=$(/usr/share/google/get_metadata_value attributes/key1) 20 | echo "Metadata key1=${METADATA1}" 21 | METADATA2=$(/usr/share/google/get_metadata_value attributes/key2) 22 | echo "Metadata key1=${METADATA2}" 23 | 24 | if [[ ${METADATA1} != "value1" || ${METADATA2} != "value2" ]]; then 25 | echo "Unexpected metadata values" 26 | exit 1 27 | fi 28 | 29 | echo "Installing custom packages..." 30 | apt-get -y update 31 | apt-get install python-dev python-pip -y 32 | pip install numpy 33 | echo "Successfully installed custom packages." 34 | -------------------------------------------------------------------------------- /custom_image_utils/shell_image_creator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | Shell script based custom image creator. 17 | """ 18 | 19 | import logging 20 | 21 | from custom_image_utils import shell_script_executor 22 | from custom_image_utils import shell_script_generator 23 | 24 | logging.basicConfig() 25 | _LOG = logging.getLogger(__name__) 26 | _LOG.setLevel(logging.INFO) 27 | 28 | 29 | def create(args): 30 | """Creates a custom image with generated Shell script.""" 31 | 32 | # Generate Shell script. 33 | _LOG.info("Generating Shell script...") 34 | script = shell_script_generator.Generator().generate(vars(args)) 35 | _LOG.info("#" * 60) 36 | _LOG.info(script) 37 | _LOG.info("#" * 60) 38 | _LOG.info("Successfully generated Shell script...") 39 | 40 | # Run the script to build custom image. 41 | if not args.dry_run: 42 | _LOG.info("Creating custom image...") 43 | shell_script_executor.run(script) 44 | _LOG.info("Successfully created custom image...") 45 | else: 46 | _LOG.info("Skip creating custom image (dry run).") 47 | -------------------------------------------------------------------------------- /custom_image_utils/shell_script_executor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | Shell script executor. 17 | """ 18 | 19 | import os 20 | import subprocess 21 | import sys 22 | import tempfile 23 | 24 | 25 | def run(shell_script): 26 | """Runs a Shell script.""" 27 | 28 | # Write the script to a temp file. 29 | temp_file = tempfile.NamedTemporaryFile(delete=False) 30 | try: 31 | temp_file.write(shell_script.encode("utf-8")) 32 | temp_file.flush() 33 | temp_file.close() # close this file but do not delete 34 | 35 | # Run the shell script from the temp file, then wait for it to complete. 36 | pipe = subprocess.Popen( 37 | ['bash', temp_file.name], 38 | stdout=sys.stdout, 39 | stderr=sys.stderr 40 | ) 41 | #for line in iter(pipe.stdout.readline, b''): 42 | # if not line: 43 | # print(line) 44 | #pipe.stdout.close() 45 | pipe.wait() 46 | if pipe.returncode != 0: 47 | raise RuntimeError("Error building custom image.") 48 | finally: 49 | try: 50 | os.remove(temp_file.name) 51 | except OSError: 52 | pass 53 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to become a contributor and submit your own code 2 | 3 | ## Contributor License Agreements 4 | 5 | We'd love to accept your patches! Before we can take them, we 6 | have to jump a couple of legal hurdles. 7 | 8 | Please fill out either the individual or corporate Contributor License Agreement 9 | (CLA). 10 | 11 | * If you are an individual writing original source code and you're sure you 12 | own the intellectual property, then you'll need to sign an [individual CLA] 13 | (https://developers.google.com/open-source/cla/individual). 14 | * If you work for a company that wants to allow you to contribute your work, 15 | then you'll need to sign a [corporate CLA] 16 | (https://developers.google.com/open-source/cla/corporate). 17 | 18 | Follow either of the two links above to access the appropriate CLA and 19 | instructions for how to sign and return it. Once we receive it, we'll be able to 20 | accept your pull requests. 21 | 22 | ## Contributing A Patch 23 | 24 | 1. Submit an issue describing your proposed change to the repo in question. 25 | 1. The repo owner will respond to your issue promptly. 26 | 1. If your proposed change is accepted, and you haven't already done so, sign a 27 | Contributor License Agreement (see details above). 28 | 1. Fork the desired repo, develop and test your code changes. 29 | 1. Ensure that your code adheres to the existing style in the sample to which 30 | you are contributing. Refer to the 31 | [Google Cloud Platform Samples Style Guide] 32 | (https://github.com/GoogleCloudPlatform/Template/wiki/style.html) for the 33 | recommended coding standards for this organization. 34 | 1. Ensure that your code has an appropriate set of unit tests which all pass. 35 | 1. Submit a pull request. 36 | -------------------------------------------------------------------------------- /custom_image_utils/image_labeller.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Add label to Dataproc custom images. 15 | """ 16 | 17 | import logging 18 | import subprocess 19 | 20 | logging.basicConfig() 21 | _LOG = logging.getLogger(__name__) 22 | _LOG.setLevel(logging.INFO) 23 | 24 | 25 | def _set_custom_image_label(image_name, version, project_id): 26 | """Sets Dataproc version label in the custom image.""" 27 | 28 | # Convert `1.5.0-RC1-debian9` version to `1-5-0-rc1-debian9` label 29 | version_label = version.replace('.', '-').lower() 30 | label_flag = "--labels=goog-dataproc-version={}".format(version_label) 31 | command = [ 32 | "gcloud", "compute", "images", "add-labels", image_name, "--project", 33 | project_id, label_flag 34 | ] 35 | _LOG.info("Running: {}".format(" ".join(command))) 36 | 37 | # get stdout from compute images list --filters 38 | pipe = subprocess.Popen(command) 39 | pipe.wait() 40 | if pipe.returncode != 0: 41 | raise RuntimeError("Cannot set dataproc version to image label.") 42 | 43 | 44 | def add_label(args): 45 | """Sets Dataproc version label in the custom image.""" 46 | 47 | if not args.dry_run: 48 | _LOG.info("Setting label on custom image...") 49 | _set_custom_image_label(args.image_name, args.dataproc_version, 50 | args.project_id) 51 | _LOG.info("Successfully set label on custom image...") 52 | else: 53 | _LOG.info("Skip setting label on custom image (dry run).") 54 | -------------------------------------------------------------------------------- /tests/test_customize_conda_script.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2019 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euxo pipefail 18 | 19 | readonly CURRENT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd) 20 | readonly REPO_DIR=$(realpath "${CURRENT_DIR}/..") 21 | 22 | readonly TEST_SUFFIX=$(tr -dc 'a-z0-9' /dev/null 2>&1 && pwd) 20 | readonly REPO_DIR=$(realpath "${CURRENT_DIR}/..") 21 | 22 | readonly TEST_SUFFIX=$(tr -dc 'a-z0-9' /dev/null 2>&1 && pwd) 20 | readonly REPO_DIR=$(realpath "${CURRENT_DIR}/..") 21 | 22 | readonly TEST_SUFFIX=$(tr -dc 'a-z0-9' /dev/null; then 44 | ready="true" 45 | break 46 | fi 47 | 48 | if ((i == 10)); then 49 | echo "BuildFailed: timed out waiting for gsutil to be available on Ubuntu." 50 | fi 51 | done 52 | else 53 | ready="true" 54 | fi 55 | } 56 | 57 | function download_scripts() { 58 | gsutil -m cp -r "${CUSTOM_SOURCES_PATH}/*" ./ 59 | } 60 | 61 | function run_custom_script() { 62 | if ! download_scripts; then 63 | echo "BuildFailed: failed to download scripts from ${CUSTOM_SOURCES_PATH}." 64 | return 1 65 | fi 66 | 67 | # run init actions 68 | bash -x ./init_actions.sh 69 | 70 | # get return code 71 | RET_CODE=$? 72 | 73 | # print failure message if install fails 74 | if [[ $RET_CODE -ne 0 ]]; then 75 | echo "BuildFailed: Dataproc Initialization Actions Failed. Please check your initialization script." 76 | else 77 | echo "BuildSucceeded: Dataproc Initialization Actions Succeeded." 78 | fi 79 | } 80 | 81 | function cleanup() { 82 | # .config and .gsutil dirs are created by the gsutil command. It contains 83 | # transient authentication keys to access gcs bucket. The init_actions.sh and 84 | # run.sh are your customization and bootstrap scripts (this) which must be 85 | # removed after creating the image 86 | rm -rf ~/.config/ ~/.gsutil/ 87 | rm ./init_actions.sh ./run.sh 88 | } 89 | 90 | function main() { 91 | wait_until_ready 92 | 93 | if [[ "${ready}" == "true" ]]; then 94 | run_custom_script 95 | cleanup 96 | fi 97 | 98 | echo "Sleep ${SHUTDOWN_TIMER_IN_SEC}s before shutting down..." 99 | echo "You can change the timeout value with --shutdown-instance-timer-sec" 100 | sleep "${SHUTDOWN_TIMER_IN_SEC}" # wait for stdout to flush 101 | shutdown -h now 102 | } 103 | 104 | main "$@" 105 | -------------------------------------------------------------------------------- /custom_image_utils/smoke_test_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Run smoke test for Dataproc custom images. 15 | """ 16 | 17 | import datetime 18 | import logging 19 | import subprocess 20 | import uuid 21 | 22 | logging.basicConfig() 23 | _LOG = logging.getLogger(__name__) 24 | _LOG.setLevel(logging.INFO) 25 | 26 | def _create_workflow_template(workflow_name, image_name, project_id, zone, region, 27 | network, subnet, no_external_ip): 28 | """Create a Dataproc workflow template for testing.""" 29 | create_command = [ 30 | "gcloud", "dataproc", "workflow-templates", "create", 31 | workflow_name, "--project", project_id, "--region", region 32 | ] 33 | set_cluster_command = [ 34 | "gcloud", "dataproc", "workflow-templates", 35 | "set-managed-cluster", workflow_name, "--project", project_id, "--image", 36 | image_name, "--zone", zone, "--region", region 37 | ] 38 | if network and not subnet: 39 | set_cluster_command.extend(["--network", network]) 40 | else: 41 | set_cluster_command.extend(["--subnet", subnet]) 42 | if no_external_ip: 43 | set_cluster_command.extend(["--no-address"]) 44 | add_job_command = [ 45 | "gcloud", "dataproc", "workflow-templates", "add-job", "spark", 46 | "--workflow-template", workflow_name, "--project", project_id, "--region", region, 47 | "--step-id", "001", "--class", "org.apache.spark.examples.SparkPi", 48 | "--jars", "file:///usr/lib/spark/examples/jars/spark-examples.jar", "--", 49 | "1000" 50 | ] 51 | pipe = subprocess.Popen(create_command) 52 | pipe.wait() 53 | if pipe.returncode != 0: 54 | raise RuntimeError("Error creating Dataproc workflow template '%s'.", 55 | workflow_name) 56 | 57 | pipe = subprocess.Popen(set_cluster_command) 58 | pipe.wait() 59 | if pipe.returncode != 0: 60 | raise RuntimeError( 61 | "Error setting cluster for Dataproc workflow template '%s'.", 62 | workflow_name) 63 | 64 | pipe = subprocess.Popen(add_job_command) 65 | pipe.wait() 66 | if pipe.returncode != 0: 67 | raise RuntimeError("Error adding job to Dataproc workflow template '%s'.", 68 | workflow_name) 69 | 70 | 71 | def _instantiate_workflow_template(workflow_name, project_id, region): 72 | """Run a Dataproc workflow template to test the newly built custom image.""" 73 | command = [ 74 | "gcloud", "dataproc", "workflow-templates", "instantiate", 75 | workflow_name, "--project", project_id, "--region", region 76 | ] 77 | pipe = subprocess.Popen(command) 78 | pipe.wait() 79 | if pipe.returncode != 0: 80 | raise RuntimeError("Unable to instantiate workflow template.") 81 | 82 | 83 | def _delete_workflow_template(workflow_name, project_id, region): 84 | """Delete a Dataproc workflow template.""" 85 | command = [ 86 | "gcloud", "dataproc", "workflow-templates", "delete", 87 | workflow_name, "-q", "--project", project_id, "--region", region 88 | ] 89 | pipe = subprocess.Popen(command) 90 | pipe.wait() 91 | if pipe.returncode != 0: 92 | raise RuntimeError("Error deleting workfloe template %s.", workflow_name) 93 | 94 | 95 | def _verify_custom_image(image_name, project_id, zone, network, subnetwork, no_external_ip): 96 | """Verifies if custom image works with Dataproc.""" 97 | region = zone[:-2] 98 | date = datetime.datetime.now().strftime("%Y%m%d%H%M%S") 99 | # Note: workflow_name can collide if the script runs more than 10000 100 | # times/second. 101 | workflow_name = "verify-image-{}-{}".format(date, uuid.uuid4().hex[-8:]) 102 | try: 103 | _LOG.info("Creating Dataproc workflow-template %s with image %s...", 104 | workflow_name, image_name) 105 | _create_workflow_template(workflow_name, image_name, project_id, zone, region, 106 | network, subnetwork, no_external_ip) 107 | _LOG.info( 108 | "Successfully created Dataproc workflow-template %s with image %s...", 109 | workflow_name, image_name) 110 | _LOG.info("Smoke testing Dataproc workflow-template %s...") 111 | _instantiate_workflow_template(workflow_name, project_id, region) 112 | _LOG.info("Successfully smoke tested Dataproc workflow-template %s...", 113 | workflow_name) 114 | except RuntimeError as e: 115 | err_msg = "Verification of custom image {} failed: {}".format( 116 | image_name, e) 117 | _LOG.error(err_msg) 118 | raise RuntimeError(err_msg) 119 | finally: 120 | try: 121 | _LOG.info("Deleting Dataproc workflow-template %s...", workflow_name) 122 | _delete_workflow_template(workflow_name, project_id, region) 123 | _LOG.info("Successfully deleted Dataproc workflow-template %s...", 124 | workflow_name) 125 | except RuntimeError: 126 | pass 127 | 128 | 129 | def run(args): 130 | """Runs smoke test.""" 131 | 132 | if not args.dry_run: 133 | if not args.no_smoke_test: 134 | _LOG.info("Verifying the custom image...") 135 | _verify_custom_image(args.image_name, args.project_id, args.zone, 136 | args.network, args.subnetwork, args.no_external_ip) 137 | _LOG.info("Successfully verified the custom image...") 138 | else: 139 | _LOG.info("Skip running smoke test (dry run).") 140 | -------------------------------------------------------------------------------- /tests/test_shell_script_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from custom_image_utils import shell_script_generator 18 | 19 | _expected_script = """ 20 | #!/usr/bin/env bash 21 | 22 | # Script for creating Dataproc custom image. 23 | 24 | set -euxo pipefail 25 | 26 | RED='\\e[0;31m' 27 | GREEN='\\e[0;32m' 28 | NC='\\e[0m' 29 | 30 | function exit_handler() { 31 | echo 'Cleaning up before exiting.' 32 | 33 | if [[ -f /tmp/custom-image-my-image-20190611-160823/vm_created ]]; then 34 | echo 'Deleting VM instance.' 35 | gcloud compute instances delete my-image-install --project=my-project --zone=us-west1-a -q 36 | elif [[ -f /tmp/custom-image-my-image-20190611-160823/disk_created ]]; then 37 | echo 'Deleting disk.' 38 | gcloud compute disks delete my-image-install --project=my-project --zone=us-west1-a -q 39 | fi 40 | 41 | echo 'Uploading local logs to GCS bucket.' 42 | gsutil -m rsync -r /tmp/custom-image-my-image-20190611-160823/logs/ gs://my-bucket/custom-image-my-image-20190611-160823/logs/ 43 | 44 | if [[ -f /tmp/custom-image-my-image-20190611-160823/image_created ]]; then 45 | echo -e "${GREEN}Workflow succeeded, check logs at /tmp/custom-image-my-image-20190611-160823/logs/ or gs://my-bucket/custom-image-my-image-20190611-160823/logs/${NC}" 46 | exit 0 47 | else 48 | echo -e "${RED}Workflow failed, check logs at /tmp/custom-image-my-image-20190611-160823/logs/ or gs://my-bucket/custom-image-my-image-20190611-160823/logs/${NC}" 49 | exit 1 50 | fi 51 | } 52 | 53 | function main() { 54 | echo 'Uploading files to GCS bucket.' 55 | declare -a sources_k=([0]='run.sh' [1]='init_actions.sh' [2]='ext'\\''ra_src.txt') 56 | declare -a sources_v=([0]='startup_script/run.sh' [1]='/tmp/my-script.sh' [2]='/path/to/extra.txt') 57 | for i in "${!sources_k[@]}"; do 58 | gsutil cp "${sources_v[i]}" "gs://my-bucket/custom-image-my-image-20190611-160823/sources/${sources_k[i]}" 59 | done 60 | 61 | echo 'Creating disk.' 62 | if [[ 'projects/my-dataproc-project/global/images/family/debian-10' = '' || 'projects/my-dataproc-project/global/images/family/debian-10' = 'None' ]]; then 63 | IMAGE_SOURCE="--image=projects/cloud-dataproc/global/images/dataproc-1-4-deb9-20190510-000000-rc01" 64 | else 65 | IMAGE_SOURCE="--image-family=projects/my-dataproc-project/global/images/family/debian-10" 66 | fi 67 | 68 | gcloud compute disks create my-image-install --project=my-project --zone=us-west1-a ${IMAGE_SOURCE} --type=pd-ssd --size=40GB 69 | 70 | touch "/tmp/custom-image-my-image-20190611-160823/disk_created" 71 | 72 | echo 'Creating VM instance to run customization script.' 73 | gcloud compute instances create my-image-install --project=my-project --zone=us-west1-a --subnet=my-subnet --no-address --machine-type=n1-standard-2 --disk=auto-delete=yes,boot=yes,mode=rw,name=my-image-install --accelerator=type=nvidia-tesla-v100,count=2 --maintenance-policy terminate --service-account=my-service-account --scopes=cloud-platform --metadata=shutdown-timer-in-sec=500,custom-sources-path=gs://my-bucket/custom-image-my-image-20190611-160823/sources,key1=value1,key2=value2 --metadata-from-file startup-script=startup_script/run.sh 74 | touch /tmp/custom-image-my-image-20190611-160823/vm_created 75 | 76 | echo 'Waiting for customization script to finish and VM shutdown.' 77 | gcloud compute instances tail-serial-port-output my-image-install --project=my-project --zone=us-west1-a --port=1 2>&1 | grep 'startup-script' | tee /tmp/custom-image-my-image-20190611-160823/logs/startup-script.log || true 78 | 79 | echo 'Checking customization script result.' 80 | if grep 'BuildFailed:' /tmp/custom-image-my-image-20190611-160823/logs/startup-script.log; then 81 | echo -e "${RED}Customization script failed.${NC}" 82 | exit 1 83 | elif grep 'BuildSucceeded:' /tmp/custom-image-my-image-20190611-160823/logs/startup-script.log; then 84 | echo -e "${GREEN}Customization script succeeded.${NC}" 85 | else 86 | echo 'Unable to determine the customization script result.' 87 | exit 1 88 | fi 89 | 90 | echo 'Creating custom image.' 91 | gcloud compute images create my-image --project=my-project --source-disk-zone=us-west1-a --source-disk=my-image-install --storage-location=us-east1 --family=debian9 92 | touch /tmp/custom-image-my-image-20190611-160823/image_created 93 | } 94 | 95 | trap exit_handler EXIT 96 | mkdir -p /tmp/custom-image-my-image-20190611-160823/logs 97 | main "$@" 2>&1 | tee /tmp/custom-image-my-image-20190611-160823/logs/workflow.log 98 | """ 99 | 100 | 101 | class TestShellScriptGenerator(unittest.TestCase): 102 | def test_generate_shell_script(self): 103 | args = { 104 | 'run_id': 'custom-image-my-image-20190611-160823', 105 | 'family': 'debian9', 106 | 'image_name': 'my-image', 107 | 'customization_script': '/tmp/my-script.sh', 108 | 'metadata': 'key1=value1,key2=value2', 109 | 'extra_sources': {"ext'ra_src.txt": "/path/to/extra.txt"}, 110 | 'machine_type': 'n1-standard-2', 111 | 'disk_size': 40, 112 | 'accelerator': 'type=nvidia-tesla-v100,count=2', 113 | 'gcs_bucket': 'gs://my-bucket', 114 | 'network': 'my-network', 115 | 'subnetwork': 'my-subnet', 116 | 'no_external_ip': True, 117 | 'zone': 'us-west1-a', 118 | 'dataproc_base_image': 119 | 'projects/cloud-dataproc/global/images/dataproc-1-4-deb9-20190510-000000-rc01', 120 | 'service_account': 'my-service-account', 121 | 'oauth': '', 122 | 'project_id': 'my-project', 123 | 'storage_location': 'us-east1', 124 | 'shutdown_timer_in_sec': 500, 125 | 'base_image_family': 'projects/my-dataproc-project/global/images/family/debian-10' 126 | } 127 | 128 | script = shell_script_generator.Generator().generate(args) 129 | 130 | self.assertEqual(script, _expected_script) 131 | 132 | 133 | if __name__ == '__main__': 134 | unittest.main() 135 | -------------------------------------------------------------------------------- /custom_image_utils/shell_script_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Shell script based image creation workflow generator. 16 | """ 17 | 18 | from datetime import datetime 19 | 20 | 21 | _template = """ 22 | #!/usr/bin/env bash 23 | 24 | # Script for creating Dataproc custom image. 25 | 26 | set -euxo pipefail 27 | 28 | RED='\\e[0;31m' 29 | GREEN='\\e[0;32m' 30 | NC='\\e[0m' 31 | 32 | function exit_handler() {{ 33 | echo 'Cleaning up before exiting.' 34 | 35 | if [[ -f /tmp/{run_id}/vm_created ]]; then 36 | echo 'Deleting VM instance.' 37 | gcloud compute instances delete {image_name}-install \ 38 | --project={project_id} --zone={zone} -q 39 | elif [[ -f /tmp/{run_id}/disk_created ]]; then 40 | echo 'Deleting disk.' 41 | gcloud compute disks delete {image_name}-install --project={project_id} --zone={zone} -q 42 | fi 43 | 44 | echo 'Uploading local logs to GCS bucket.' 45 | gsutil -m rsync -r {log_dir}/ {gcs_log_dir}/ 46 | 47 | if [[ -f /tmp/{run_id}/image_created ]]; then 48 | echo -e "${{GREEN}}Workflow succeeded, check logs at {log_dir}/ or {gcs_log_dir}/${{NC}}" 49 | exit 0 50 | else 51 | echo -e "${{RED}}Workflow failed, check logs at {log_dir}/ or {gcs_log_dir}/${{NC}}" 52 | exit 1 53 | fi 54 | }} 55 | 56 | function main() {{ 57 | echo 'Uploading files to GCS bucket.' 58 | declare -a sources_k=({sources_map_k}) 59 | declare -a sources_v=({sources_map_v}) 60 | for i in "${{!sources_k[@]}}"; do 61 | gsutil cp "${{sources_v[i]}}" "{custom_sources_path}/${{sources_k[i]}}" 62 | done 63 | 64 | echo 'Creating disk.' 65 | if [[ '{base_image_family}' = '' || '{base_image_family}' = 'None' ]]; then 66 | IMAGE_SOURCE="--image={dataproc_base_image}" 67 | else 68 | IMAGE_SOURCE="--image-family={base_image_family}" 69 | fi 70 | 71 | gcloud compute disks create {image_name}-install \ 72 | --project={project_id} \ 73 | --zone={zone} \ 74 | ${{IMAGE_SOURCE}} \ 75 | --type=pd-ssd \ 76 | --size={disk_size}GB 77 | 78 | touch "/tmp/{run_id}/disk_created" 79 | 80 | echo 'Creating VM instance to run customization script.' 81 | gcloud compute instances create {image_name}-install \ 82 | --project={project_id} \ 83 | --zone={zone} \ 84 | {network_flag} \ 85 | {subnetwork_flag} \ 86 | {no_external_ip_flag} \ 87 | --machine-type={machine_type} \ 88 | --disk=auto-delete=yes,boot=yes,mode=rw,name={image_name}-install \ 89 | {accelerator_flag} \ 90 | {service_account_flag} \ 91 | --scopes=cloud-platform \ 92 | {metadata_flag} \ 93 | --metadata-from-file startup-script=startup_script/run.sh 94 | touch /tmp/{run_id}/vm_created 95 | 96 | echo 'Waiting for customization script to finish and VM shutdown.' 97 | gcloud compute instances tail-serial-port-output {image_name}-install \ 98 | --project={project_id} \ 99 | --zone={zone} \ 100 | --port=1 2>&1 \ 101 | | grep 'startup-script' \ 102 | | tee {log_dir}/startup-script.log \ 103 | || true 104 | 105 | echo 'Checking customization script result.' 106 | if grep 'BuildFailed:' {log_dir}/startup-script.log; then 107 | echo -e "${{RED}}Customization script failed.${{NC}}" 108 | exit 1 109 | elif grep 'BuildSucceeded:' {log_dir}/startup-script.log; then 110 | echo -e "${{GREEN}}Customization script succeeded.${{NC}}" 111 | else 112 | echo 'Unable to determine the customization script result.' 113 | exit 1 114 | fi 115 | 116 | echo 'Creating custom image.' 117 | gcloud compute images create {image_name} \ 118 | --project={project_id} \ 119 | --source-disk-zone={zone} \ 120 | --source-disk={image_name}-install \ 121 | {storage_location_flag} \ 122 | --family={family} 123 | touch /tmp/{run_id}/image_created 124 | }} 125 | 126 | trap exit_handler EXIT 127 | mkdir -p {log_dir} 128 | main "$@" 2>&1 | tee {log_dir}/workflow.log 129 | """ 130 | 131 | class Generator: 132 | """Shell script based image creation workflow generator.""" 133 | 134 | def _init_args(self, args): 135 | self.args = args 136 | if "run_id" not in self.args: 137 | self.args["run_id"] = "custom-image-{image_name}-{timestamp}".format( 138 | timestamp=datetime.now().strftime("%Y%m%d-%H%M%S"), **self.args) 139 | self.args["bucket_name"] = self.args["gcs_bucket"].replace("gs://", "") 140 | self.args["custom_sources_path"] = "gs://{bucket_name}/{run_id}/sources".format(**self.args) 141 | 142 | all_sources = { 143 | "run.sh": "startup_script/run.sh", 144 | "init_actions.sh": self.args["customization_script"] 145 | } 146 | all_sources.update(self.args["extra_sources"]) 147 | 148 | sources_map_items = tuple(enumerate(all_sources.items())) 149 | self.args["sources_map_k"] = " ".join([ 150 | "[{}]='{}'".format(i, kv[0].replace("'", "'\\''")) for i, kv in sources_map_items]) 151 | self.args["sources_map_v"] = " ".join([ 152 | "[{}]='{}'".format(i, kv[1].replace("'", "'\\''")) for i, kv in sources_map_items]) 153 | 154 | self.args["log_dir"] = "/tmp/{run_id}/logs".format(**self.args) 155 | self.args["gcs_log_dir"] = "gs://{bucket_name}/{run_id}/logs".format( 156 | **self.args) 157 | if self.args["subnetwork"]: 158 | self.args["subnetwork_flag"] = "--subnet={subnetwork}".format(**self.args) 159 | self.args["network_flag"] = "" 160 | elif self.args["network"]: 161 | self.args["network_flag"] = "--network={network}".format(**self.args) 162 | self.args["subnetwork_flag"] = "" 163 | if self.args["service_account"]: 164 | self.args[ 165 | "service_account_flag"] = "--service-account={service_account}".format( 166 | **self.args) 167 | self.args["no_external_ip_flag"] = "--no-address" if self.args[ 168 | "no_external_ip"] else "" 169 | self.args[ 170 | "accelerator_flag"] = "--accelerator={accelerator} --maintenance-policy terminate".format( 171 | **self.args) if self.args["accelerator"] else "" 172 | self.args[ 173 | "storage_location_flag"] = "--storage-location={storage_location}".format( 174 | **self.args) if self.args["storage_location"] else "" 175 | metadata_flag_template = ( 176 | "--metadata=shutdown-timer-in-sec={shutdown_timer_in_sec}," 177 | "custom-sources-path={custom_sources_path}") 178 | if self.args["metadata"]: 179 | metadata_flag_template += ",{metadata}" 180 | self.args["metadata_flag"] = metadata_flag_template.format(**self.args) 181 | 182 | def generate(self, args): 183 | self._init_args(args) 184 | return _template.format(**args) 185 | -------------------------------------------------------------------------------- /scripts/customize_conda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2019 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euxo pipefail 18 | 19 | # This customization-script can be used to customize the conda environment. 20 | # It expects the following metadata: 21 | # 22 | # conda-component: (Required) Must be either ANACONDA or MINICONDA3. Please 23 | # make sure the base image supports the component passed here, else the 24 | # script will fail. Anaconda is not supported on 2.0 images. For information 25 | # on Anaconda vs Miniconda, refer to Miniconda's latest documentation 26 | # https://docs.conda.io/en/latest/miniconda.html 27 | # 28 | # conda-env-config-uri: (Optional) Must be a GCS URI to the yaml config 29 | # file. 30 | # 31 | # conda-packages: (Optional) A list of conda packages with versions to be 32 | # installed in the base environment. Must be of the format 33 | # :#:... 34 | # 35 | # pip-packages: (Optional) A list of pip packages with versions to be 36 | # installed in the base environment. Must be of the format 37 | # :#:... 38 | # 39 | # conda-env-config-uri is mutually exclusive with conda-packages and 40 | # pip-packages. If both are provided, the script will fail. 41 | # If environment config file does not contain name of the environment, the name 42 | # "custom" will be used by default. 43 | # 44 | # 45 | # Examples 46 | # 47 | # The following example extracts config file from your environment, copies it to 48 | # your GCS bucket and uses it to create a cluster. 49 | # 50 | # conda env export --name= > environment.yaml 51 | # gsutil cp environment.yaml gs:///environment.yaml 52 | # python generate_custom_image.py \ 53 | # --image-name \ 54 | # --dataproc-version "1.5.34-debian10" \ 55 | # --customization-script scripts/customize_conda.sh \ 56 | # --zone \ 57 | # --gcs-bucket gs:// \ 58 | # --metadata 'conda-component=MINICONDA3,dataproc:conda.env.config.uri=gs:///environment.yaml' 59 | # 60 | # 61 | # The following example installs the specified conda and pip packages into the 62 | # base environment. 63 | # python generate_custom_image.py \ 64 | # --image-name \ 65 | # --dataproc-version "1.5.34-debian10" \ 66 | # --customization-script scripts/customize_conda.sh \ 67 | # --zone \ 68 | # --gcs-bucket gs:// \ 69 | # --metadata 'conda-component=MINICONDA3,conda-packages=pytorch:1.4.0#visions:0.7.1,pip-packages=tokenizers:0.10.1#numpy:1.19.2' 70 | 71 | 72 | function customize_conda() { 73 | local conda_component 74 | local conda_env_config_uri 75 | local conda_packages 76 | local pip_packages 77 | local conda_bin_dir 78 | conda_component=$(/usr/share/google/get_metadata_value attributes/conda-component || true) 79 | conda_env_config_uri=$(/usr/share/google/get_metadata_value attributes/conda-env-config-uri || true) 80 | conda_packages=$(/usr/share/google/get_metadata_value attributes/conda-packages || true) 81 | pip_packages=$(/usr/share/google/get_metadata_value attributes/pip-packages || true) 82 | 83 | validate_conda_component "${conda_component}" 84 | 85 | if [[ -n "${conda_env_config_uri}" && (( -n "${conda_packages}" || -n "${pip_packages}" )) ]]; then 86 | echo "conda-env-config-uri is mutually exclusive with conda-packages and pip-packages." 87 | exit 1 88 | fi 89 | 90 | if [[ "${conda_component}" == 'ANACONDA' ]]; then 91 | conda_bin_dir="/opt/conda/anaconda/bin" 92 | elif [[ "${conda_component}" == 'MINICONDA3' ]]; then 93 | conda_bin_dir="/opt/conda/miniconda3/bin" 94 | fi 95 | if [[ -n "${conda_env_config_uri}" ]]; then 96 | customize_with_config_file "${conda_bin_dir}" "${conda_env_config_uri}" 97 | else 98 | customize_with_package_list "${conda_bin_dir}" "${conda_packages}" "${pip_packages}" 99 | fi 100 | } 101 | 102 | function validate_conda_component() { 103 | local -r conda_component=$1 104 | 105 | if [[ -z "${conda_component}" ]]; then 106 | echo "Expected metadata conda-component not found" 107 | exit 1 108 | fi 109 | 110 | if [[ "${conda_component}" != 'ANACONDA' && "${conda_component}" != 'MINICONDA3' ]]; then 111 | echo "Metadata conda-component should either be ANACONDA or MINICONDA3" 112 | exit 1 113 | fi 114 | } 115 | 116 | function customize_with_config_file() { 117 | local -r conda_bin_dir=$1 118 | local -r conda_env_config_uri=$2 119 | local temp_config_file 120 | temp_config_file=$(mktemp /tmp/conda_env_XXX.yaml) 121 | gsutil cp "${conda_env_config_uri}" "${temp_config_file}" 122 | conda_env_name="$(grep 'name: ' "${temp_config_file}" | awk '{print $2}')" 123 | if [[ -z "${conda_env_name}" ]]; then 124 | conda_env_name="custom" 125 | fi 126 | create_and_activate_environment "${conda_bin_dir}" "${conda_env_name}" "${temp_config_file}" 127 | } 128 | 129 | function create_and_activate_environment() { 130 | local -r conda_bin_dir=$1 131 | local -r conda_env_name=$2 132 | local -r conda_env_config=$3 133 | "${conda_bin_dir}/conda" env create --quiet --name="${conda_env_name}" --file="${conda_env_config}" 134 | source "${conda_bin_dir}/activate" "${conda_env_name}" 135 | 136 | # Set property conda.env, which can be used during activate of the conda 137 | # component to activate the right environment. 138 | local -r conda_properties_path=/etc/google-dataproc/conda.properties 139 | echo "conda.env=$conda_env_name" >> "${conda_properties_path}" 140 | } 141 | 142 | function customize_with_package_list() { 143 | local -r conda_bin_dir=$1 144 | local conda_packages=$2 145 | local pip_packages=$3 146 | if [[ -n "${conda_packages}" ]]; then 147 | local -a packages 148 | conda_packages=$(echo "${conda_packages}" | sed -r 's/:/==/g') 149 | IFS='#' read -r -a packages <<< "${conda_packages}" 150 | validate_package_formats "${packages[@]}" 151 | 152 | # Conda will upgrade dependencies only if required, and fail if conflict 153 | # resolution with existing packages is not possible. 154 | "${conda_bin_dir}/conda" install "${packages[@]}" --yes 155 | fi 156 | if [[ -n "${pip_packages}" ]]; then 157 | local -a packages 158 | pip_packages=$(echo "${pip_packages}" | sed -r 's/:/==/g') 159 | IFS='#' read -r -a packages <<< "${pip_packages}" 160 | validate_package_formats "${packages[@]}" 161 | 162 | # Pip will upgrade dependencies only if required. Pip does not check for 163 | # conflicts and may result in inconsistent environment. 164 | "${conda_bin_dir}/pip" install -U --upgrade-strategy only-if-needed "${packages[@]}" 165 | fi 166 | } 167 | 168 | function validate_package_formats() { 169 | local -r packages=("$@") 170 | local -r regex='.+==[0-9]+[\\.[0-9]+]*' 171 | for package in "${packages[@]}"; do 172 | if ! [[ "${package}" =~ $regex ]]; then 173 | echo "Invalid package format ${package}" 174 | exit 1 175 | fi 176 | done 177 | } 178 | 179 | customize_conda 180 | -------------------------------------------------------------------------------- /custom_image_utils/args_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | This is a utility module which defines and parses the command-line arguments 17 | for the generate_custom_image.py script. 18 | """ 19 | 20 | import argparse 21 | import json 22 | import re 23 | 24 | from custom_image_utils import constants 25 | 26 | 27 | # Old style images: 1.2.3 28 | # New style images: 1.2.3-deb8, 1.2.3-debian9, 1.2.3-RC10-debian9 29 | _VERSION_REGEX = re.compile(r"^\d+\.\d+\.\d+(-RC\d+)?(-[a-z]+\d+)?$") 30 | _FULL_IMAGE_URI = re.compile(r"^(https://www\.googleapis\.com/compute/([^/]+)/)?projects/([^/]+)/global/images/([^/]+)$") 31 | _FULL_IMAGE_FAMILY_URI = re.compile(r"^(https://www\.googleapis\.com/compute/([^/]+)/)?projects/([^/]+)/global/images/family/([^/]+)$") 32 | _LATEST_FROM_MINOR_VERSION = re.compile(r"^(\d+)\.(\d+)-((?:debian|ubuntu|centos)\d+)$") 33 | 34 | def _version_regex_type(s): 35 | """Check if version string matches regex.""" 36 | if not _VERSION_REGEX.match(s) and not _LATEST_FROM_MINOR_VERSION.match(s): 37 | raise argparse.ArgumentTypeError("Invalid version: {}.".format(s)) 38 | return s 39 | 40 | def _full_image_uri_regex_type(s): 41 | """Check if the partial image uri string matches regex.""" 42 | if not _FULL_IMAGE_URI.match(s): 43 | raise argparse.ArgumentTypeError("Invalid image URI: {}.".format(s)) 44 | return s 45 | 46 | def _full_image_family_uri_regex_type(s): 47 | """Check if the partial image family uri string matches regex.""" 48 | if not _FULL_IMAGE_FAMILY_URI.match(s): 49 | raise argparse.ArgumentTypeError("Invalid image family URI: {}.".format(s)) 50 | return s 51 | 52 | def parse_args(args): 53 | """Parses command-line arguments.""" 54 | parser = argparse.ArgumentParser() 55 | required_args = parser.add_argument_group("required named arguments") 56 | required_args.add_argument( 57 | "--image-name", 58 | type=str, 59 | required=True, 60 | help="""The image name for the Dataproc custom image.""") 61 | image_args = required_args.add_mutually_exclusive_group() 62 | image_args.add_argument( 63 | "--dataproc-version", 64 | type=_version_regex_type, 65 | help=constants.version_help_text) 66 | image_args.add_argument( 67 | "--base-image-uri", 68 | type=_full_image_uri_regex_type, 69 | help="""The full image URI for the base Dataproc image. The 70 | customiziation script will be executed on top of this image instead of 71 | an out-of-the-box Dataproc image. This image must be a valid Dataproc 72 | image. 73 | """) 74 | image_args.add_argument( 75 | "--base-image-family", 76 | type=_full_image_family_uri_regex_type, 77 | help="""The source image family URI. The latest non-depracated image associated with the family will be used. 78 | """) 79 | required_args.add_argument( 80 | "--customization-script", 81 | type=str, 82 | required=True, 83 | help="""User's script to install custom packages.""") 84 | required_args.add_argument( 85 | "--metadata", 86 | type=str, 87 | required=False, 88 | help="""VM metadata which can be read by the customization script 89 | with `/usr/share/google/get_metadata_value attributes/` at runtime. 90 | The value of this flag takes the form of `key1=value1,key2=value2,...`. 91 | If the value includes special characters (e.g., `=`, `,` or spaces) which 92 | needs to be escaped, consider encoding the value, then decode it back in 93 | the customization script. See more information about VM metadata 94 | on https://cloud.google.com/sdk/gcloud/reference/compute/instances/create. 95 | """) 96 | required_args.add_argument( 97 | "--zone", 98 | type=str, 99 | required=True, 100 | help="""GCE zone used to build the custom image.""") 101 | required_args.add_argument( 102 | "--gcs-bucket", 103 | type=str, 104 | required=True, 105 | help="""GCS bucket used to store files and logs when 106 | building custom image.""") 107 | parser.add_argument( 108 | "--family", 109 | type=str, 110 | required=False, 111 | default='dataproc-custom-image', 112 | help="""(Optional) The family of the image.""") 113 | parser.add_argument( 114 | "--project-id", 115 | type=str, 116 | required=False, 117 | help="""The project Id of the project where the custom image will be 118 | created and saved. The default value will be set to the project id 119 | specified by `gcloud config get-value project`.""") 120 | parser.add_argument( 121 | "--oauth", 122 | type=str, 123 | required=False, 124 | help="""A local path to JSON credentials for your GCE project. 125 | The default oauth is the application-default credentials from gcloud.""") 126 | parser.add_argument( 127 | "--machine-type", 128 | type=str, 129 | required=False, 130 | default="n1-standard-1", 131 | help="""(Optional) Machine type used to build custom image. 132 | Default machine type is n1-standard-1.""") 133 | parser.add_argument( 134 | "--no-smoke-test", 135 | action="store_true", 136 | help="""(Optional) Disables smoke test to verify if the custom image 137 | can create a functional Dataproc cluster.""") 138 | parser.add_argument( 139 | "--network", 140 | type=str, 141 | required=False, 142 | default="", 143 | help="""(Optional) Network interface used to launch the VM instance that 144 | builds the custom image. Default network is 'global/networks/default' 145 | when no network and subnetwork arguments are provided. 146 | If the default network does not exist in your project, please specify 147 | a valid network interface.""") 148 | parser.add_argument( 149 | "--subnetwork", 150 | type=str, 151 | required=False, 152 | default="", 153 | help="""(Optional) The subnetwork that is used to launch the VM instance 154 | that builds the custom image. A full subnetwork URL is required. 155 | Default subnetwork is None. For shared VPC only provide this parameter and 156 | do not use the --network argument.""") 157 | parser.add_argument( 158 | "--no-external-ip", 159 | action="store_true", 160 | help="""(Optional) Disables external IP for the image build VM. The VM 161 | will not be able to access the internet, but if Private Google 162 | Access is enabled for the subnetwork, it can still access Google services 163 | (e.g., GCS) through internal IP of the VPC.""") 164 | parser.add_argument( 165 | "--service-account", 166 | type=str, 167 | required=False, 168 | default="default", 169 | help= 170 | """(Optional) The service account that is used to launch the VM instance 171 | that builds the custom image. If not specified, the default service 172 | account under the GCE project will be used. The scope of this service 173 | account is defaulted to /auth/cloud-platform.""") 174 | parser.add_argument( 175 | "--extra-sources", 176 | type=json.loads, 177 | required=False, 178 | default={}, 179 | help= 180 | """(Optional) Additional files/directories uploaded along with 181 | customization script. This argument is evaluated to a json dictionary. 182 | For example: 183 | '--extra-sources "{\\"notes.txt\\": \\"/path/to/notes.txt\\"}"' 184 | """) 185 | parser.add_argument( 186 | "--disk-size", 187 | type=int, 188 | required=False, 189 | default=20, 190 | help= 191 | """(Optional) The size in GB of the disk attached to the VM instance 192 | that builds the custom image. If not specified, the default value of 193 | 15 GB will be used.""") 194 | parser.add_argument( 195 | "--accelerator", 196 | type=str, 197 | required=False, 198 | default=None, 199 | help= 200 | """(Optional) The accelerators (e.g. GPUs) attached to the VM instance 201 | that builds the custom image. If not specified, no accelerators are 202 | attached.""") 203 | parser.add_argument( 204 | "--storage-location", 205 | type=str, 206 | required=False, 207 | default=None, 208 | help= 209 | """(Optional) The storage location (e.g. US, us-central1) of the custom 210 | GCE image. If not specified, the default GCE image storage location is 211 | used.""") 212 | parser.add_argument( 213 | "--shutdown-instance-timer-sec", 214 | type=int, 215 | required=False, 216 | default=300, 217 | help= 218 | """(Optional) The time to wait in seconds before shutting down the VM 219 | instance. This value may need to be increased if your init script 220 | generates a lot of output on stdout. If not specified, the default value 221 | of 300 seconds will be used.""") 222 | parser.add_argument( 223 | "--dry-run", 224 | action="store_true", 225 | help="""(Optional) Only generates script without creating image.""") 226 | 227 | return parser.parse_args(args) 228 | -------------------------------------------------------------------------------- /tests/test_args_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | import exceptions 17 | from custom_image_utils import args_parser 18 | 19 | 20 | class TestArgsParser(unittest.TestCase): 21 | 22 | def test_missing_required_args(self): 23 | """Verifies it fails if missing required args.""" 24 | with self.assertRaises(SystemExit) as e: 25 | args_parser.parse_args([]) 26 | 27 | def test_minimal_required_args(self): 28 | """Verifies it succeeds if all required args are present.""" 29 | customization_script = '/tmp/my-script.sh' 30 | gcs_bucket = 'gs://my-bucket' 31 | image_name = 'my-image' 32 | zone = 'us-west1-a' 33 | 34 | args = args_parser.parse_args([ 35 | '--image-name', image_name, 36 | '--customization-script', customization_script, 37 | '--zone', zone, 38 | '--gcs-bucket', gcs_bucket]) 39 | 40 | expected_result = self._make_expected_result( 41 | accelerator=None, 42 | base_image_family="None", 43 | base_image_uri="None", 44 | customization_script="'{}'".format(customization_script), 45 | dataproc_version="None", 46 | disk_size="20", 47 | dry_run=False, 48 | extra_sources="{}", 49 | family="'dataproc-custom-image'", 50 | gcs_bucket="'{}'".format(gcs_bucket), 51 | image_name="'{}'".format(image_name), 52 | machine_type="'n1-standard-1'", 53 | network="'{}'".format(''), 54 | no_external_ip="False", 55 | no_smoke_test="False", 56 | oauth="None", 57 | project_id="None", 58 | service_account="'default'", 59 | shutdown_instance_timer_sec="300", 60 | storage_location=None, 61 | subnetwork="''", 62 | zone="'{}'".format(zone), 63 | metadata=None 64 | ) 65 | self.assertEqual(str(args), expected_result) 66 | 67 | def test_optional_args(self): 68 | """Verifies it succeeds with optional arguments specified.""" 69 | accelerator = 'type=nvidia-tesla-v100,count=2' 70 | customization_script = '/tmp/my-script.sh' 71 | dataproc_version = '1.4.5-debian9' 72 | disk_size = 40 73 | dry_run = True 74 | family = 'debian9' 75 | gcs_bucket = 'gs://my-bucket' 76 | image_name = 'my-image' 77 | machine_type = 'n1-standard-4' 78 | network = 'my-network' 79 | no_external_ip = True 80 | no_smoke_test = True 81 | oauth = 'xyz' 82 | project_id = 'my-project' 83 | service_account = "my-service-account" 84 | shutdown_instance_timer_sec = 567 85 | storage_location = 'us-east1' 86 | subnetwork = 'my-subnetwork' 87 | zone = 'us-west1-a' 88 | metadata = 'key1=value1,key2=value2' 89 | 90 | args = args_parser.parse_args([ 91 | '--accelerator', str(accelerator), 92 | '--customization-script', customization_script, 93 | '--dataproc-version', dataproc_version, 94 | '--disk-size', str(disk_size), 95 | '--dry-run', 96 | '--family', family, 97 | '--gcs-bucket', gcs_bucket, 98 | '--image-name', image_name, 99 | '--machine-type', machine_type, 100 | '--network', network, 101 | '--no-external-ip', 102 | '--no-smoke-test', 103 | '--oauth', oauth, 104 | '--project-id', project_id, 105 | '--service-account', service_account, 106 | '--shutdown-instance-timer-sec', str(shutdown_instance_timer_sec), 107 | '--storage-location', str(storage_location), 108 | '--subnetwork', subnetwork, 109 | '--zone', zone, 110 | '--metadata', metadata, 111 | ]) 112 | 113 | expected_result = self._make_expected_result( 114 | accelerator="'{}'".format(accelerator), 115 | base_image_family="None", 116 | base_image_uri="None", 117 | customization_script="'{}'".format(customization_script), 118 | dataproc_version="'{}'".format(dataproc_version), 119 | disk_size="{}".format(disk_size), 120 | dry_run="{}".format(dry_run), 121 | extra_sources="{}", 122 | family="'{}'".format(family), 123 | gcs_bucket="'{}'".format(gcs_bucket), 124 | image_name="'{}'".format(image_name), 125 | machine_type="'{}'".format(machine_type), 126 | metadata="'{}'".format(metadata), 127 | network="'{}'".format(network), 128 | no_external_ip="{}".format(no_external_ip), 129 | no_smoke_test="{}".format(no_smoke_test), 130 | oauth="'{}'".format(oauth), 131 | project_id="'{}'".format(project_id), 132 | service_account="'{}'".format(service_account), 133 | shutdown_instance_timer_sec="{}".format(shutdown_instance_timer_sec), 134 | storage_location="'{}'".format(storage_location), 135 | subnetwork="'{}'".format(subnetwork), 136 | zone="'{}'".format(zone), 137 | ) 138 | self.assertEqual(str(args), expected_result) 139 | 140 | def test_inferred_subminor_versions(self): 141 | """Verifies it succeeds if inferred/unspecified subminor version is correctly formatted.""" 142 | customization_script = '/tmp/my-script.sh' 143 | gcs_bucket = 'gs://my-bucket' 144 | image_name = 'my-image' 145 | zone = 'us-west1-a' 146 | 147 | def _args_parsed(dataproc_version): 148 | return args_parser.parse_args([ 149 | '--image-name', image_name, 150 | '--dataproc-version', dataproc_version, 151 | '--customization-script', customization_script, 152 | '--zone', zone, 153 | '--gcs-bucket', gcs_bucket]) 154 | 155 | def _expected_result(dataproc_version): 156 | return self._make_expected_result( 157 | accelerator=None, 158 | base_image_family="None", 159 | base_image_uri="None", 160 | customization_script="'{}'".format(customization_script), 161 | dataproc_version="'{}'".format(dataproc_version), 162 | disk_size="20", 163 | dry_run=False, 164 | extra_sources="{}", 165 | family="'dataproc-custom-image'", 166 | gcs_bucket="'{}'".format(gcs_bucket), 167 | image_name="'{}'".format(image_name), 168 | machine_type="'n1-standard-1'", 169 | network="'{}'".format(''), 170 | no_external_ip="False", 171 | no_smoke_test="False", 172 | oauth="None", 173 | project_id="None", 174 | service_account="'default'", 175 | shutdown_instance_timer_sec="300", 176 | storage_location=None, 177 | subnetwork="''", 178 | zone="'{}'".format(zone), 179 | metadata=None 180 | ) 181 | 182 | def _args_exception(dataproc_version): 183 | # Checks that inputs produce an exception 184 | try: 185 | _args_parsed(dataproc_version) 186 | except SystemExit as e: 187 | self.assertEqual(e.__class__, exceptions.SystemExit) 188 | else: 189 | raise ValueError("Exception not raised") 190 | 191 | self.assertEqual(str(_args_parsed('1.5-debian10')), _expected_result('1.5-debian10')) 192 | self.assertEqual(str(_args_parsed('1.3-ubuntu18')), _expected_result('1.3-ubuntu18')) 193 | self.assertEqual(str(_args_parsed('1.3-centos8')), _expected_result('1.3-centos8')) 194 | 195 | invalid_dataproc_versions = ['*.*.*-debian10', '1.**.*-debian10', '1.*.8*-debian10', '11.*.*-debian', 196 | '1.*-debian10', '1.5.*-debian10', '1.5.-debian10', '1.*.*-debian10'] 197 | try: 198 | for version in invalid_dataproc_versions: 199 | _args_exception(version) 200 | except ValueError as e: 201 | raise e 202 | 203 | def _make_expected_result( 204 | self, 205 | accelerator, 206 | base_image_family, 207 | base_image_uri, 208 | customization_script, 209 | dataproc_version, 210 | disk_size, 211 | dry_run, 212 | extra_sources, 213 | family, 214 | gcs_bucket, 215 | image_name, 216 | machine_type, 217 | metadata, 218 | network, 219 | no_external_ip, 220 | no_smoke_test, 221 | oauth, 222 | project_id, 223 | service_account, 224 | shutdown_instance_timer_sec, 225 | storage_location, 226 | subnetwork, 227 | zone): 228 | expected_result_template = ( 229 | "Namespace(" 230 | "accelerator={}, " 231 | "base_image_family={}, " 232 | "base_image_uri={}, " 233 | "customization_script={}, " 234 | "dataproc_version={}, " 235 | "disk_size={}, " 236 | "dry_run={}, " 237 | "extra_sources={}, " 238 | "family={}, " 239 | "gcs_bucket={}, " 240 | "image_name={}, " 241 | "machine_type={}, " 242 | "metadata={}, " 243 | "network={}, " 244 | "no_external_ip={}, " 245 | "no_smoke_test={}, " 246 | "oauth={}, " 247 | "project_id={}, " 248 | "service_account={}, " 249 | "shutdown_instance_timer_sec={}, " 250 | "storage_location={}, " 251 | "subnetwork={}, " 252 | "zone={})") 253 | return expected_result_template.format( 254 | accelerator, 255 | base_image_family, 256 | base_image_uri, 257 | customization_script, 258 | dataproc_version, 259 | disk_size, 260 | dry_run, 261 | extra_sources, 262 | family, 263 | gcs_bucket, 264 | image_name, 265 | machine_type, 266 | metadata, 267 | network, 268 | no_external_ip, 269 | no_smoke_test, 270 | oauth, 271 | project_id, 272 | service_account, 273 | shutdown_instance_timer_sec, 274 | storage_location, 275 | subnetwork, 276 | zone) 277 | 278 | if __name__ == '__main__': 279 | unittest.main() 280 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Build Dataproc custom images 2 | 3 | This page describes how to generate a custom Dataproc image. 4 | 5 | ## Important notes 6 | 7 | To help ensure that clusters receive the latest service updates and bug fixes, 8 | the creation of clusters with a custom image is limited to **365 days** from the 9 | image creation date, but existing custom-image clusters can run indefinitely. 10 | Automation to continuously build a custom image may be necessary if you wish to 11 | create clusters with a custom image for a period greater than 365 days. 12 | 13 | Creating clusters with expired custom images is possible by following these 14 | [instructions](https://cloud.google.com/dataproc/docs/guides/dataproc-images#how_to_create_a_cluster_with_an_expired_custom_image), 15 | but Cloud Dataproc cannot guarantee support of issues that arise with these 16 | clusters. 17 | 18 | ## Requirements 19 | 20 | 1. Python 2.7+. 21 | 2. gcloud 181.0.0 (2017-11-30). 22 | 3. Bash 3.0. 23 | 4. A GCE project with billing, Google Cloud Dataproc API, Google Compute Engine 24 | API, and Google Cloud Storage APIs enabled. 25 | 5. Use `gcloud config set project ` to specify which project to 26 | use to create and save your custom image. 27 | 28 | ## Generate custom image 29 | 30 | To generate a custom image, you can run the following command: 31 | 32 | ```shell 33 | python generate_custom_image.py \ 34 | --image-name '' \ 35 | --dataproc-version '' \ 36 | --customization-script '' \ 37 | --zone '' \ 38 | --gcs-bucket '' 39 | ``` 40 | 41 | ### Arguments 42 | 43 | * **--image-name**: The name for custom image. 44 | * **--dataproc-version**: The Dataproc version for this custom image to build 45 | on. Examples: `1.5.9-debian10`, `1.5.0-RC10-debian10`, `1.5.9-ubuntu18`. If 46 | the sub-minor version is unspecified, the latest available one will be used. 47 | Examples: `1.5-centos8`, `2.0-debian10`. For a complete list of Dataproc 48 | image versions, please refer to Dataproc 49 | [release notes](https://cloud.google.com/dataproc/docs/release-notes). To 50 | understand Dataproc versioning, please refer to 51 | [documentation](https://cloud.google.com/dataproc/docs/concepts/versioning/overview). 52 | **This argument is mutually exclusive with `--base-image-uri` and 53 | `--source-image-family`**. 54 | * **--base-image-uri**: The full image URI for the base Dataproc image. The 55 | customization script will be executed on top of this image instead of an 56 | out-of-the-box Dataproc image. This image must be a valid Dataproc image. 57 | **This argument is mutually exclusive with `--dataproc-version` and 58 | `--source-image-family`**. 59 | * **--base-image-family**: The image family that the boot disk will be 60 | initialized with. The latest non-deprecated image from the family will be 61 | used. An example base image family URI is 62 | `projects/PROJECT_NAME/global/images/family/`. To get the list 63 | of image families (and the associated image), run `gcloud compute images 64 | list [--project ]`. **This argument is mutually exclusive with 65 | `--dataproc-version` and `--base-image-uri`**. 66 | * **--customization-script**: The script used to install custom packages on 67 | the image. 68 | * **--zone**: The GCE zone for running your GCE instance. 69 | * **--gcs-bucket**: A GCS bucket to store the logs of building custom image. 70 | 71 | #### Optional Arguments 72 | 73 | * **--family**: The family of the source image. This will cause the latest 74 | non-deprecated image in the family to be used as the source image. 75 | * **--project-id**: The project Id of the project where the custom image is 76 | created and saved. The default project Id is the current project id 77 | specified in `gcloud config get-value project`. 78 | * **--oauth**: The OAuth credential file used to call Google Cloud APIs. The 79 | default OAuth is the application-default credentials from gcloud. 80 | * **--machine-type**: The machine type used to build custom image. The default 81 | is `n1-standard-1`. 82 | * **--no-smoke-test**: This parameter is used to disable smoke testing the 83 | newly built custom image. The smoke test is used to verify if the newly 84 | built custom image can create a functional Dataproc cluster. Disabling this 85 | step will speed up the custom image build process; however, it is not 86 | advised. Note: The smoke test will create a Dataproc cluster with the newly 87 | built image, runs a short job and deletes the cluster in the end. 88 | * **--network**: This parameter specifies the GCE network to be used to launch 89 | the GCE VM instance which builds the custom Dataproc image. The default 90 | network is 'global/networks/default'. If the default network does not exist 91 | in your project, please specify a valid network interface. For more 92 | information on network interfaces, please refer to 93 | [GCE VPC documentation](https://cloud.google.com/vpc/docs/vpc). 94 | * **--subnetwork**: This parameter specifies the subnetwork that is used to 95 | launch the VM instance that builds the custom Dataprocimage. A full 96 | subnetwork URL is required. The default subnetwork is None. For more 97 | information, please refer to 98 | [GCE VPC documentation](https://cloud.google.com/vpc/docs/vpc). 99 | * **--no-external-ip**: This parameter is used to disables external IP for the 100 | image build VM. The VM will not be able to access the internet, but if 101 | [Private Google Access](https://cloud.google.com/vpc/docs/configure-private-google-access) 102 | is enabled for the subnetwork, it can still access Google services (e.g., 103 | GCS) through internal IP of the VPC. 104 | * **--service-account**: The service account that is used to launch the VM 105 | instance that builds the custom Dataproc image. The scope of this service 106 | account is defaulted to "/auth/cloud-platform", which authorizes VM instance 107 | the access to all cloud platform services that is granted by IAM roles. 108 | Note: IAM role must allow the VM instance to access GCS bucket in order to 109 | access scripts and write logs. 110 | * **--extra-sources**: Additional files/directories uploaded along with 111 | customization script. This argument is evaluated to a json dictionary. 112 | * **--disk-size**: The size in GB of the disk attached to the VM instance used 113 | to build custom image. The default is `20` GB. 114 | * **--accelerator**: The accelerators (e.g. GPUs) attached to the VM instance 115 | used to build custom image. This flag supports the same 116 | [values](https://cloud.google.com/sdk/gcloud/reference/compute/instances/create#--accelerator) 117 | as `gcloud compute instances create --accelerator` flag. By default no 118 | accelerators are attached. 119 | * **--base-image-uri**: The partial image URI for the base Dataproc image. The 120 | customization script will be executed on top of this image instead of an 121 | out-of-the-box Dataproc image. This image must be a valid Dataproc image. 122 | The format of the partial image URI is the following: 123 | `projects//global/images/`. 124 | * **--storage-location**: The storage location (e.g. US, us-central1) of the 125 | custom GCE image. This flag supports the same 126 | [values](https://cloud.google.com/sdk/gcloud/reference/compute/images/create#--storage-location) 127 | as `gcloud compute images create --storage-location` flag. If not specified, 128 | the default GCE image storage location is used. 129 | * **--shutdown-instance-timer-sec**: The time to wait in seconds before 130 | shutting down the VM instance. This value may need to be increased if your 131 | init script generates a lot of output on stdout. If not specified, the 132 | default value of 300 seconds will be used. 133 | * **--dry-run**: Dry run mode which only validates input and generates 134 | workflow script without creating image. Disabled by default. 135 | * **--metadata**: VM metadata which can be read by the customization script 136 | with `/usr/share/google/get_metadata_value attributes/` at runtime. The 137 | value of this flag takes the form of `key1=value1,key2=value2,...`. If the 138 | value includes special characters (e.g., `=`, `,` or spaces) which needs to 139 | be escaped, consider encoding the value, then decode it back in the 140 | customization script. See more information about VM metadata on 141 | https://cloud.google.com/sdk/gcloud/reference/compute/instances/create. 142 | 143 | #### Overriding cluster properties with a custom image 144 | 145 | You can use custom images to overwrite any 146 | [cluster properties](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties) 147 | set during cluster creation. If a user creates a cluster with your custom image 148 | but sets cluster properties different from those you set with your custom image, 149 | your custom image cluster property settings will take precedence. 150 | 151 | To set cluster properties with your custom image: 152 | 153 | In your custom image 154 | [customization script](https://cloud.google.com/dataproc/docs/guides/dataproc-images#running_the_code), 155 | create a `dataproc.custom.properties` file in `/etc/google-dataproc`, then set 156 | cluster property values in the file. 157 | 158 | * Sample `dataproc.custom.properties` file contents: 159 | 160 | ```shell 161 | dataproc.conscrypt.provider.enable=true 162 | dataproc.logging.stackdriver.enable=false 163 | ``` 164 | 165 | * Sample customization script file-creation snippet to override two cluster 166 | properties: 167 | 168 | ```shell 169 | cat </etc/google-dataproc/dataproc.custom.properties 170 | dataproc.conscrypt.provider.enable=true 171 | dataproc.logging.stackdriver.enable=false EOF 172 | ``` 173 | 174 | ### Examples 175 | 176 | #### Create a custom image 177 | 178 | Create a custom image with name `custom-image-1-5-9` with Dataproc version 179 | `1.5.9-debian10`: 180 | 181 | ```shell 182 | python generate_custom_image.py \ 183 | --image-name custom-image-1-5-9 \ 184 | --dataproc-version 1.5.9-debian10 \ 185 | --customization-script ~/custom-script.sh \ 186 | --metadata 'key1=value1,key2=value2' \ 187 | --zone us-central1-f \ 188 | --gcs-bucket gs://my-test-bucket 189 | ``` 190 | 191 | #### Create a custom image without running smoke test 192 | 193 | ```shell 194 | python generate_custom_image.py \ 195 | --image-name custom-image-1-5-9 \ 196 | --dataproc-version 1.5.9-debian10 \ 197 | --customization-script ~/custom-script.sh \ 198 | --zone us-central1-f \ 199 | --gcs-bucket gs://my-test-bucket \ 200 | --no-smoke-test 201 | ``` 202 | -------------------------------------------------------------------------------- /custom_image_utils/args_inferer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Infer arguments for Dataproc custom image build. 16 | """ 17 | 18 | import logging 19 | import os 20 | import re 21 | import subprocess 22 | import tempfile 23 | 24 | _IMAGE_PATH = "projects/{}/global/images/{}" 25 | _IMAGE_URI = re.compile( 26 | r"^(https://www\.googleapis\.com/compute/([^/]+)/)?projects/([^/]+)/global/images/([^/]+)$" 27 | ) 28 | _IMAGE_FAMILY_PATH = "projects/{}/global/images/family/{}" 29 | _IMAGE_FAMILY_URI = re.compile( 30 | r"^(https://www\.googleapis\.com/compute/([^/]+)/)?projects/([^/]+)/global/images/family/([^/]+)$" 31 | ) 32 | logging.basicConfig() 33 | _LOG = logging.getLogger(__name__) 34 | _LOG.setLevel(logging.INFO) 35 | 36 | 37 | def _get_project_id(): 38 | """Get project id from gcloud config.""" 39 | gcloud_command = ["gcloud", "config", "get-value", "project"] 40 | with tempfile.NamedTemporaryFile() as temp_file: 41 | pipe = subprocess.Popen(gcloud_command, stdout=temp_file) 42 | pipe.wait() 43 | if pipe.returncode != 0: 44 | raise RuntimeError("Cannot find gcloud project ID. " 45 | "Please setup the project ID in gcloud SDK") 46 | # get project id 47 | temp_file.seek(0) 48 | stdout = temp_file.read() 49 | return stdout.decode('utf-8').strip() 50 | 51 | 52 | def _extract_image_name_and_project(image_uri): 53 | """Get Dataproc image name and project.""" 54 | m = _IMAGE_URI.match(image_uri) 55 | return m.group(3), m.group(4) # project, image_name 56 | 57 | 58 | def _extract_image_name_and_project_from_family_uri(image_uri): 59 | """Get Dataproc image family name and project.""" 60 | m = _IMAGE_FAMILY_URI.match(image_uri) 61 | return m.group(3), m.group(4) # project, image_name 62 | 63 | 64 | def _get_dataproc_image_version(image_uri): 65 | """Get Dataproc image version from image URI.""" 66 | project, image_name = _extract_image_name_and_project(image_uri) 67 | command = [ 68 | "gcloud", "compute", "images", "describe", image_name, "--project", 69 | project, "--format=value(labels.goog-dataproc-version)" 70 | ] 71 | 72 | # get stdout from compute images list --filters 73 | with tempfile.NamedTemporaryFile() as temp_file: 74 | pipe = subprocess.Popen(command, stdout=temp_file) 75 | pipe.wait() 76 | if pipe.returncode != 0: 77 | raise RuntimeError( 78 | "Cannot find dataproc base image, please check and verify " 79 | "the base image URI.") 80 | 81 | temp_file.seek(0) # go to start of the stdout 82 | stdout = temp_file.read() 83 | # parse the first ready image with the dataproc version attached in labels 84 | if stdout: 85 | parsed_line = stdout.decode('utf-8').strip() # should be just one value 86 | return parsed_line 87 | 88 | raise RuntimeError("Cannot find dataproc base image: %s", image_uri) 89 | 90 | 91 | def _get_dataproc_version_from_image_family(image_family_uri): 92 | """Get Dataproc image family version from family name.""" 93 | project, image_family_name = _extract_image_name_and_project_from_family_uri(image_family_uri) 94 | command = [ 95 | "gcloud", "compute", "images", "describe-from-family", image_family_name, "--project", 96 | project, "--format=value(labels.goog-dataproc-version)" 97 | ] 98 | 99 | # get stdout from compute images list --filters 100 | with tempfile.NamedTemporaryFile() as temp_file: 101 | pipe = subprocess.Popen(command, stdout=temp_file) 102 | pipe.wait() 103 | if pipe.returncode != 0: 104 | raise RuntimeError( 105 | "Cannot find dataproc base family image, please check and verify " 106 | "the family URI.") 107 | 108 | temp_file.seek(0) # go to start of the stdout 109 | stdout = temp_file.read() 110 | # parse the first ready image with the dataproc version attached in labels 111 | if stdout: 112 | dataproc_version = stdout.decode('utf-8').strip() # should be just one value 113 | return dataproc_version 114 | 115 | raise RuntimeError("Cannot find dataproc base image family: %s" % 116 | image_family_uri) 117 | 118 | def _extract_image_path(image_uri): 119 | """Get the partial image URI from the full image URI.""" 120 | project, image_name = _extract_image_name_and_project(image_uri) 121 | return _IMAGE_PATH.format(project, image_name) 122 | 123 | def _extract_image_family_path(image_family_uri): 124 | """Get the partial image family URI from the full image family URI.""" 125 | project, image_name = _extract_image_name_and_project_from_family_uri(image_family_uri) 126 | return _IMAGE_FAMILY_PATH.format(project, image_name) 127 | 128 | def _get_dataproc_image_path_by_version(version): 129 | """Get Dataproc base image name from version.""" 130 | # version regex already checked in arg parser 131 | parsed_version = version.split(".") 132 | major_version = parsed_version[0] 133 | if len(parsed_version) == 2: 134 | # The input version must be of format 1.5-debian10 in which case we need to 135 | # expand it to 1-5-\d+-debian10 so we can do a regexp on the minor version 136 | minor_version = parsed_version[1].split("-")[0] 137 | parsed_version[1] = parsed_version[1].replace("-", "-\d+-") 138 | filter_arg = ("labels.goog-dataproc-version ~ ^{}-{} AND NOT name ~ -eap$" 139 | " AND status = READY").format(parsed_version[0], 140 | parsed_version[1]) 141 | else: 142 | major_version = parsed_version[0] 143 | minor_version = parsed_version[1] 144 | # Moreover, push the filter of READY status and name not containing 'eap' to 145 | # gcloud command so we don't have to iterate the list 146 | filter_arg = ("labels.goog-dataproc-version = {}-{}-{} AND NOT name ~ -eap$" 147 | " AND status = READY").format(parsed_version[0], 148 | parsed_version[1], 149 | parsed_version[2]) 150 | command = [ 151 | "gcloud", "compute", "images", "list", "--project", "cloud-dataproc", 152 | "--filter", filter_arg, "--format", 153 | "csv[no-heading=true](name,labels.goog-dataproc-version)", 154 | "--sort-by=~creationTimestamp" 155 | ] 156 | 157 | _LOG.info("Executing command: {}".format(command)) 158 | # get stdout from compute images list --filters 159 | with tempfile.NamedTemporaryFile() as temp_file: 160 | pipe = subprocess.Popen(command, stdout=temp_file) 161 | pipe.wait() 162 | if pipe.returncode != 0: 163 | raise RuntimeError( 164 | "Cannot find dataproc base image, please check and verify " 165 | "[--dataproc-version]") 166 | 167 | temp_file.seek(0) # go to start of the stdout 168 | stdout = temp_file.read() 169 | # parse the first ready image with the dataproc version attached in labels 170 | if stdout: 171 | # in case there are multiple images 172 | parsed_lines = stdout.decode('utf-8').strip().split('\n') 173 | expected_prefix = "dataproc-{}-{}".format(major_version, minor_version) 174 | _LOG.info("Filtering images : %s", expected_prefix) 175 | image_versions=[] 176 | all_images_for_version = {} 177 | for line in parsed_lines: 178 | parsed_image = line.split(",") 179 | if len(parsed_image) == 2: 180 | parsed_image_name = parsed_image[0] 181 | if not parsed_image_name.startswith(expected_prefix): 182 | _LOG.info("Skipping non-release image %s", parsed_image_name) 183 | # Not a regular dataproc release image. Maybe a custom image with same label. 184 | continue 185 | parsed_image_version = parsed_image[1] 186 | if parsed_image_version not in all_images_for_version: 187 | all_images_for_version[parsed_image_version] = [_IMAGE_PATH.format("cloud-dataproc", parsed_image_name)] 188 | image_versions.append(parsed_image_version) 189 | else: 190 | all_images_for_version[parsed_image_version].append(_IMAGE_PATH.format("cloud-dataproc", parsed_image_name)) 191 | 192 | _LOG.info("All Images : %s", all_images_for_version) 193 | _LOG.info("All Image-Versions : %s", image_versions) 194 | 195 | latest_available_version = image_versions[0] 196 | if (len(all_images_for_version[latest_available_version]) > 1): 197 | raise RuntimeError( 198 | "Found more than one images for latest dataproc-version={}. Images: {}".format( 199 | latest_available_version, 200 | str(all_images_for_version[latest_available_version]))) 201 | 202 | _LOG.info("Choosing image %s with version %s", all_images_for_version[image_versions[0]][0], image_versions[0]) 203 | return all_images_for_version[image_versions[0]][0], image_versions[0] 204 | 205 | raise RuntimeError( 206 | "Cannot find dataproc base image with dataproc-version=%s." % version) 207 | 208 | 209 | def _infer_project_id(args): 210 | if not args.project_id: 211 | args.project_id = _get_project_id() 212 | 213 | 214 | def _infer_base_image(args): 215 | # get dataproc base image from dataproc version 216 | _LOG.info("Getting Dataproc base image name...") 217 | if args.base_image_uri: 218 | args.dataproc_base_image = _extract_image_path(args.base_image_uri) 219 | args.dataproc_version = _get_dataproc_image_version(args.base_image_uri) 220 | elif args.dataproc_version: 221 | args.dataproc_base_image, args.dataproc_version = _get_dataproc_image_path_by_version( 222 | args.dataproc_version) 223 | elif args.base_image_family: 224 | args.dataproc_base_image = _extract_image_family_path(args.base_image_family) 225 | args.dataproc_version = _get_dataproc_version_from_image_family(args.base_image_family) 226 | else: 227 | raise RuntimeError( 228 | "Neither --dataproc-version nor --base-image-uri nor --source-image-family-uri is specified.") 229 | _LOG.info("Returned Dataproc base image: %s", args.dataproc_base_image) 230 | _LOG.info("Returned Dataproc version : %s", args.dataproc_version) 231 | 232 | 233 | def _infer_oauth(args): 234 | if args.oauth: 235 | args.oauth = "\n \"OAuthPath\": \"{}\",".format( 236 | os.path.abspath(args.oauth)) 237 | else: 238 | args.oauth = "" 239 | 240 | 241 | def _infer_network(args): 242 | # When the user wants to create a VM in a shared VPC, 243 | # only the subnetwork argument has to be provided whereas 244 | # the network one has to be left empty. 245 | if not args.network and not args.subnetwork: 246 | args.network = 'global/networks/default' 247 | # The --network flag requires format global/networks/, 248 | # which does not work for gcloud, here we convert it to 249 | # projects//global/networks/. 250 | if args.network.startswith('global/networks/'): 251 | args.network = 'projects/{}/{}'.format(args.project_id, args.network) 252 | 253 | 254 | def infer_args(args): 255 | _infer_project_id(args) 256 | _infer_base_image(args) 257 | _infer_oauth(args) 258 | _infer_network(args) 259 | args.shutdown_timer_in_sec = args.shutdown_instance_timer_sec 260 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------