├── tests ├── __init__.py ├── cli │ ├── __init__.py │ └── test_cli.py ├── lib │ ├── __init__.py │ └── utils.py ├── commands │ ├── __init__.py │ └── test_validate_image.py ├── helper │ ├── __init__.py │ ├── test_manifest_reader.py │ ├── test_arg_parser.py │ ├── test_print_message.py │ └── test_docker.py └── validation_tool │ ├── __init__.py │ ├── validation_tests │ ├── __init__.py │ ├── test_check_local_job_run.py │ ├── test_check_envs.py │ ├── test_check_files.py │ └── test_check_manifest.py │ ├── test_check_inputs.py │ └── test_validation_helper.py ├── custom_image_cli ├── cli │ ├── __init__.py │ └── cli.py ├── commands │ ├── __init__.py │ ├── base_command.py │ └── validate_image.py ├── helper │ ├── __init__.py │ ├── docker_util │ │ ├── __init__.py │ │ ├── docker_helper.py │ │ └── docker_cl.py │ ├── manifest_reader.py │ ├── print_message.py │ ├── logging.py │ └── argument_parser.py ├── validation_tool │ ├── __init__.py │ ├── validation_models │ │ ├── __init__.py │ │ └── validation_models.py │ ├── validation_tests │ │ ├── __init__.py │ │ ├── base_check.py │ │ ├── check_local_job_run.py │ │ ├── check_envs.py │ │ ├── check_files.py │ │ └── check_manifest.py │ ├── check_inputs.py │ └── validation_helper.py ├── __init__.py └── __main__.py ├── .gitignore ├── NOTICE ├── pyinstaller-build.txt ├── Makefile ├── CODE_OF_CONDUCT.md ├── requirements.txt ├── installer ├── pyinstaller │ ├── INSTRUCTION.md │ ├── custom-image-validation-tool.spec │ ├── custom-image-validation-tool-win.spec │ ├── build-win.bat │ ├── build-mac.sh │ └── build-linux.sh └── assets │ ├── INSTALLATION_GUIDE.md │ └── installation ├── CONTRIBUTING.md ├── DEVELOPMENT_GUIDE.md ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/helper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/validation_tool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/helper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/helper/docker_util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.06" 2 | -------------------------------------------------------------------------------- /tests/validation_tool/validation_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .idea 3 | .DS_Store 4 | /venv/ 5 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /pyinstaller-build.txt: -------------------------------------------------------------------------------- 1 | # Executable binary builder requirements 2 | setuptools==47.3.2 3 | pyinstaller==4.10 4 | -------------------------------------------------------------------------------- /custom_image_cli/__main__.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.cli.cli import run 2 | 3 | if __name__ == '__main__': 4 | run() -------------------------------------------------------------------------------- /custom_image_cli/helper/docker_util/docker_helper.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | 4 | def verify_docker(): 5 | print("... Checking if docker cli is installed", flush=True) 6 | shutil.which("docker") -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | test: 2 | # Run unit tests 3 | # Fail if coverage falls below 95% 4 | coverage run -m unittest discover 5 | coverage report -m --fail-under=95 --skip-empty 6 | 7 | coverage run --branch -m unittest discover 8 | coverage report -m --fail-under=95 --skip-empty -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_tests/base_check.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class BaseCheck(ABC): 5 | """ 6 | The BaseCheck interface declares a method for checking the tests. 7 | """ 8 | 9 | @abstractmethod 10 | def check(self): 11 | pass 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /custom_image_cli/commands/base_command.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class BaseCommand(ABC): 5 | """ 6 | The BaseCommand interface declares a method for running a subcommand. 7 | """ 8 | 9 | @abstractmethod 10 | def initiate(self, args, log): 11 | pass 12 | 13 | @abstractmethod 14 | def run(self): 15 | pass 16 | -------------------------------------------------------------------------------- /custom_image_cli/helper/manifest_reader.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from custom_image_cli.validation_tool.validation_models.validation_models import ImageManifest 3 | 4 | yaml.add_constructor(ImageManifest.yaml_tag, ImageManifest.from_yaml) 5 | 6 | 7 | def load_yaml(file): 8 | with open(file, "r") as f: 9 | image_manifest = yaml.safe_load(f) 10 | return image_manifest 11 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/check_inputs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def check_version(version, release_name, log): 5 | if not version: 6 | log.error("No matching image with %s \'%s\' : FAIL" % ('ReleaseName', release_name)) 7 | sys.exit(2) 8 | 9 | 10 | def check_image(image, image_type, log): 11 | if not image: 12 | log.error("No matching image with %s \'%s\' : FAIL" % ('Imagetype', image_type)) 13 | sys.exit(2) 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/lib/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define testing utils 3 | """ 4 | 5 | IMAGE = "895885662937.dkr.ecr.us-west-2.amazonaws.com/spark/emr-5.32.0-20210129:2.4.7-amzn-0-vanilla" 6 | RELEASE_NAME = "emr-5.32" 7 | IMAGE_TYPE = "spark" 8 | 9 | INSPECT = dict() 10 | INSPECT['Id'] = 'sha:asdf' 11 | INSPECT['Created'] = '2020/04/22' 12 | INSPECT['Config'] = dict() 13 | INSPECT['Config']['User'] = 'user' 14 | INSPECT['Config']['WorkingDir'] = 'workingdir' 15 | INSPECT['Config']['Entrypoint'] = ['entrypoint'] 16 | INSPECT['Config']['Env'] = ['env1=path1', "env2=path2"] 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altgraph==0.17 2 | attrs==20.3.0 3 | build==0.5.1 4 | certifi==2023.7.22 5 | chardet==4.0.0 6 | click==7.1.2 7 | colorlog==5.0.1 8 | coverage==5.5 9 | docker==5.0.0 10 | idna==2.10 11 | iniconfig==1.1.1 12 | macholib==1.14 13 | packaging==20.9 14 | pep517==0.10.0 15 | pluggy==0.13.1 16 | pyinstaller-hooks-contrib==2021.2 17 | pyparsing==2.4.7 18 | pytest==7.2.0 19 | pytest-cov==2.11.1 20 | PyYAML~=6.0,>=6.0.1 21 | requests==2.31.0 22 | six==1.15.0 23 | toml==0.10.2 24 | urllib3==1.26.5 25 | wcwidth==0.2.5 26 | websocket-client==0.58.0 27 | zipp==3.5.0 28 | -------------------------------------------------------------------------------- /custom_image_cli/cli/cli.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.helper import argument_parser, print_message 2 | from custom_image_cli.helper.logging import Log 3 | from custom_image_cli.commands.validate_image import ValidateImage 4 | 5 | 6 | def run(): 7 | args = argument_parser.parse_commandline_arguments() 8 | log = Log() 9 | print_message.print_pre_verification_text() 10 | 11 | if args.command is not None: 12 | commands = {"validate-image": ValidateImage()} 13 | 14 | commands[args.command].initiate(args, log) 15 | commands[args.command].run() 16 | 17 | -------------------------------------------------------------------------------- /custom_image_cli/helper/print_message.py: -------------------------------------------------------------------------------- 1 | from .. import __version__ 2 | 3 | 4 | def print_validate_completion_message(validation_succeeded): 5 | print("-----------------------------------------------------------------") 6 | if validation_succeeded: 7 | print("Overall Custom Image Validation Succeeded.") 8 | else: 9 | print("Custom Image Validation Failed. Please see individual test results above for detailed information.") 10 | 11 | print("-----------------------------------------------------------------") 12 | 13 | 14 | def print_pre_verification_text(): 15 | print("Amazon EMR on EKS - Custom Image CLI") 16 | print("Version: %s" % __version__) 17 | -------------------------------------------------------------------------------- /tests/helper/test_manifest_reader.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | from custom_image_cli.helper import manifest_reader 4 | import yaml, os 5 | 6 | 7 | class TestManifestReader(unittest.TestCase): 8 | def test_load_yaml(self): 9 | yaml.load = mock.MagicMock(return_value={"Test": {"ValidationTool": "AWSValidationTool"}}) 10 | with mock.patch("builtins.open", mock.mock_open(read_data="yaml_file")): 11 | actual = manifest_reader.load_yaml("yaml_file") 12 | expected = dict() 13 | expected['Test'] = dict() 14 | expected['Test']['ValidationTool'] = "AWSValidationTool" 15 | yaml.load.assert_called_once() 16 | self.assertEqual(actual, expected) 17 | 18 | 19 | -------------------------------------------------------------------------------- /custom_image_cli/helper/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import colorlog 3 | 4 | 5 | class Log: 6 | def __init__(self): 7 | self.log = logging.getLogger("logger") 8 | stream = logging.StreamHandler() 9 | formatter = colorlog.ColoredFormatter( 10 | "%(log_color)s[%(levelname)s]%(reset)s " 11 | "%(message)s", 12 | reset=True, 13 | log_colors={ 14 | 'DEBUG': 'cyan', 15 | 'INFO': 'green', 16 | 'WARNING': 'yellow', 17 | 'ERROR': 'red', 18 | 'CRITICAL': 'red', 19 | }) 20 | stream.setFormatter(formatter) 21 | self.log.addHandler(stream) 22 | self.log.setLevel(logging.INFO) 23 | 24 | def info(self, s): 25 | self.log.info(s) 26 | 27 | def error(self, s): 28 | self.log.error(s) 29 | 30 | def warn(self, s): 31 | self.log.warning(s) 32 | -------------------------------------------------------------------------------- /installer/pyinstaller/INSTRUCTION.md: -------------------------------------------------------------------------------- 1 | # Installer Use Guide 2 | 3 | ### Build Installer for Linux 4 | The build script for linux is tested on Amazon Linux 2. 5 | 6 | ``` 7 | cd 8 | ./installer/pyinstaller/build-linux.sh amazon-emr-on-eks-custom-image-cli-linux-x86_64.zip 9 | ``` 10 | 11 | ### Build Installer for macOS 12 | 13 | ``` 14 | cd 15 | ./installer/pyinstaller/build-mac.sh amazon-emr-on-eks-custom-image-cli-mac-x86_64.zip 16 | ``` 17 | 18 | ### Build Executable for windows 19 | 20 | #### Build the .exe file. 21 | 22 | If you have python3 pre-installed: 23 | ``` 24 | cd 25 | ./installer/pyinstaller/build-win.bat 26 | ``` 27 | 28 | If you don't have python3 pre-installed, you can either install it by yourself or enter python version as an 29 | input to automatically install. 30 | ``` 31 | cd 32 | ./installer/pyinstaller/build-win.bat 3.7.9 33 | ``` 34 | This may require to unblock the python-installation.exe file (The requirement differs in computers). 35 | -------------------------------------------------------------------------------- /installer/pyinstaller/custom-image-validation-tool.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | 4 | block_cipher = None 5 | 6 | 7 | a = Analysis(['../../custom_image_cli/__main__.py'], 8 | pathex=[''], 9 | binaries=[], 10 | datas=[('../../assets/image-manifest.yaml', 'assets')], 11 | hiddenimports=[], 12 | hookspath=[], 13 | runtime_hooks=[], 14 | excludes=[], 15 | win_no_prefer_redirects=False, 16 | win_private_assemblies=False, 17 | cipher=block_cipher, 18 | noarchive=False) 19 | pyz = PYZ(a.pure, a.zipped_data, 20 | cipher=block_cipher) 21 | exe = EXE(pyz, 22 | a.scripts, 23 | a.binaries, 24 | a.zipfiles, 25 | a.datas, 26 | [], 27 | name='emr-on-eks-custom-image', 28 | debug=False, 29 | bootloader_ignore_signals=False, 30 | strip=False, 31 | upx=True, 32 | upx_exclude=[], 33 | runtime_tmpdir=None, 34 | console=True ) 35 | -------------------------------------------------------------------------------- /installer/pyinstaller/custom-image-validation-tool-win.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | 4 | block_cipher = None 5 | 6 | 7 | a = Analysis(['..\\..\\custom_image_cli\\__main__.py'], 8 | pathex=[''], 9 | binaries=[], 10 | datas=[('..\\..\\assets\\image-manifest.yaml', 'assets')], 11 | hiddenimports=[], 12 | hookspath=[], 13 | runtime_hooks=[], 14 | excludes=[], 15 | win_no_prefer_redirects=False, 16 | win_private_assemblies=False, 17 | cipher=block_cipher, 18 | noarchive=False) 19 | pyz = PYZ(a.pure, a.zipped_data, 20 | cipher=block_cipher) 21 | exe = EXE(pyz, 22 | a.scripts, 23 | a.binaries, 24 | a.zipfiles, 25 | a.datas, 26 | [], 27 | name='emr-on-eks-custom-image', 28 | debug=False, 29 | bootloader_ignore_signals=False, 30 | strip=False, 31 | upx=True, 32 | upx_exclude=[], 33 | runtime_tmpdir=None, 34 | console=True ) 35 | -------------------------------------------------------------------------------- /custom_image_cli/helper/docker_util/docker_cl.py: -------------------------------------------------------------------------------- 1 | import docker 2 | 3 | 4 | class DockerCommand: 5 | def __init__(self): 6 | self.client = docker.from_env() 7 | self.container = None 8 | 9 | def create_container(self, docker_image_uri): 10 | return self.client.containers.run(image=docker_image_uri, command="/bin/bash", detach=True, tty=True) 11 | 12 | def docker_list_files(self, docker_image_uri, path): 13 | command = ['bash', '-c', 'ls -al %s | awk \'{print $9}\'' % path] 14 | result = self.docker_run(docker_image_uri, command) 15 | return result.output 16 | 17 | def docker_inspect(self, docker_image_uri): 18 | return self.client.api.inspect_image(docker_image_uri) 19 | 20 | def docker_run(self, image_uri, command): 21 | if self.container is None: 22 | self.container = self.create_container(image_uri) 23 | return self.container.exec_run(command) 24 | 25 | def close_docker(self): 26 | if self.container is not None: 27 | self.container.stop(timeout=0) 28 | self.container.remove() 29 | self.client.close() 30 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_tests/check_local_job_run.py: -------------------------------------------------------------------------------- 1 | from docker.errors import ContainerError 2 | from custom_image_cli.validation_tool.validation_tests import base_check 3 | 4 | 5 | class CheckLocalJobRun(base_check.BaseCheck): 6 | 7 | def __init__(self, image_uri, docker_cmd, log): 8 | self.image_uri = image_uri 9 | self.docker_cmd = docker_cmd 10 | self.log = log 11 | self.entry_point = 'local:///usr/lib/spark/examples/jars/spark-examples.jar' 12 | 13 | def check(self): 14 | try: 15 | print('... Start Running Sample Spark Job') 16 | command = ['bash', 'spark-submit ' \ 17 | '--deploy-mode client ' \ 18 | '--master local ' \ 19 | '--class org.apache.spark.examples.SparkPi ' + self.entry_point] 20 | self.docker_cmd.docker_run(self.image_uri, command) 21 | except ContainerError: 22 | self.log.error('Sample Spark Job Test with %s : FAIL' % self.entry_point) 23 | return False 24 | self.log.info('Sample Spark Job Test with %s : PASS' % self.entry_point) 25 | return True 26 | -------------------------------------------------------------------------------- /tests/helper/test_arg_parser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | from argparse import Namespace 4 | from custom_image_cli.helper import argument_parser 5 | from tests.lib.utils import IMAGE, RELEASE_NAME, IMAGE_TYPE 6 | 7 | 8 | class TestArgParser(unittest.TestCase): 9 | @mock.patch("custom_image_cli.helper.argument_parser.ArgsParser") 10 | def test_arg_parser(self, argsparser): 11 | args = ["-i", IMAGE, "-r", RELEASE_NAME, "-t", IMAGE_TYPE] 12 | parser = argsparser.return_value 13 | parser.add_argument = mock.Mock() 14 | parser.parse_args = mock.Mock(return_value=Namespace(command='validate-image', local_image_uri=IMAGE, 15 | release_name=RELEASE_NAME, image_type=IMAGE_TYPE)) 16 | parser_args = argument_parser.parse_commandline_arguments(args) 17 | parser.add_argument.assert_called() 18 | parser.parse_args.assert_called_once() 19 | self.assertEqual(parser_args.local_image_uri, args[1]) 20 | self.assertEqual(parser_args.release_name, args[3]) 21 | self.assertEqual(parser_args.image_type, args[5]) 22 | self.assertEqual(parser_args.command, 'validate-image') 23 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_tests/check_envs.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.validation_tool.validation_tests import base_check 2 | 3 | 4 | class CheckEnvs(base_check.BaseCheck): 5 | 6 | def __init__(self, env_path, env_list, env_vars, log): 7 | self.env_path = env_path 8 | self.env_list = env_list 9 | self.env_vars = env_vars 10 | self.log = log 11 | 12 | def check(self): 13 | env_variables = {} 14 | for env in self.env_path: 15 | var, val = env.split("=") 16 | env_variables[var] = val 17 | return self.match(env_variables) 18 | 19 | def match(self, env_variables): 20 | environment_test = True 21 | for key in self.env_list: 22 | env = [env_var for env_var in self.env_vars if env_var.key == key][0] 23 | if env.env_name in env_variables and env.env_value == env_variables[env.env_name]: 24 | self.log.info("%s is set with value: %s : PASS" % (env.env_name, env.env_value)) 25 | else: 26 | self.log.error("%s MUST set to %s : FAIL" % (env.env_name, env.env_value)) 27 | environment_test = False 28 | return environment_test 29 | 30 | def set_env_path(self, env_path): 31 | self.env_path = env_path 32 | 33 | def set_manifest_envs(self, env_list): 34 | self.env_list = env_list 35 | -------------------------------------------------------------------------------- /tests/validation_tool/test_check_inputs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tests.lib.utils import RELEASE_NAME, IMAGE_TYPE 3 | from custom_image_cli.helper import logging 4 | from custom_image_cli.validation_tool import check_inputs 5 | 6 | 7 | class TestInputs(unittest.TestCase): 8 | def setUp(self) -> None: 9 | self.release_name = RELEASE_NAME 10 | self.type = IMAGE_TYPE 11 | self.log = logging.Log() 12 | 13 | def test_check_version(self): 14 | self.assertIsNone(check_inputs.check_version(RELEASE_NAME, RELEASE_NAME, self.log)) 15 | expected = "ERROR:logger:No matching image with ReleaseName '%s' : FAIL" % RELEASE_NAME 16 | with self.assertRaises(SystemExit) as t, self.assertLogs(self.log.log) as m: 17 | check_inputs.check_version(None, RELEASE_NAME, self.log) 18 | self.assertEqual(t.exception.code, 2) 19 | self.assertIn(expected, m.output) 20 | 21 | def test_check_image(self): 22 | self.assertIsNone(check_inputs.check_image(self.type, self.type, self.log)) 23 | expected = "ERROR:logger:No matching image with Imagetype '%s' : FAIL" % self.type 24 | with self.assertRaises(SystemExit) as t, self.assertLogs(self.log.log) as m: 25 | check_inputs.check_image(None, self.type, self.log) 26 | self.assertEqual(t.exception.code, 2) 27 | self.assertIn(expected, m.output) 28 | 29 | -------------------------------------------------------------------------------- /tests/cli/test_cli.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | from argparse import Namespace 4 | from tests.lib.utils import IMAGE, IMAGE_TYPE, RELEASE_NAME 5 | from custom_image_cli.cli import cli 6 | 7 | 8 | class TestCli(unittest.TestCase): 9 | 10 | @mock.patch("custom_image_cli.helper.print_message.print_pre_verification_text") 11 | @mock.patch("custom_image_cli.helper.argument_parser.parse_commandline_arguments") 12 | @mock.patch("custom_image_cli.commands.validate_image.ValidateImage.run") 13 | @mock.patch("custom_image_cli.commands.validate_image.ValidateImage.initiate") 14 | @mock.patch("custom_image_cli.commands.validate_image.ValidateImage.__init__") 15 | def test_run_validate_image(self, validate_image_constructor, validate_image_initiate, validate_image_run, 16 | parse_args, print_pre): 17 | validate_image_constructor.return_value = None 18 | validate_image_initiate.return_value = None 19 | validate_image_run.return_value = None 20 | 21 | parse_args.return_value = Namespace(local_image_uri=IMAGE, release_name=RELEASE_NAME, 22 | image_type=IMAGE_TYPE, command='validate-image') 23 | self.assertIsNone(cli.run()) 24 | 25 | print_pre.assert_called_once() 26 | validate_image_initiate.assert_called_once() 27 | validate_image_run.assert_called_once() 28 | parse_args.assert_called_once() 29 | -------------------------------------------------------------------------------- /installer/pyinstaller/build-win.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | set python_version=%1 3 | 4 | 5 | echo "Making Folders" 6 | mkdir .build_win\src 7 | mkdir .build_win\output\amazon-emr-on-eks-custom-image-cli 8 | 9 | echo "Copying Source" 10 | robocopy . .build_win\src /e /XD .build_win 11 | cd .build_win\src 12 | rmdir /s /q scripts 13 | del Makefile 14 | rmdir /s /q venv 15 | rmdir /s /q __pycache__ 16 | cd .. 17 | 18 | if not "%python_version%"=="" ( 19 | echo "Installing Python3" 20 | curl "https://www.python.org/ftp/python/%python_version%/python-%python_version%-amd64.exe" --output python_install.exe 21 | python_install.exe /quiet InstallAllUsers=1 PrependPath=1 Include_test=0 22 | ) 23 | 24 | 25 | echo "Installing Python Libraries" 26 | py -3 -m venv venv 27 | venv\Scripts\pip3.exe install -r src/requirements.txt 28 | 29 | echo "Installing PyInstaller" 30 | venv\Scripts\pip3.exe install pyinstaller 31 | 32 | echo "Building Binary" 33 | cd src 34 | echo "custom-image-validation-tool.spec content is:" 35 | ..\venv\Scripts\python.exe -m PyInstaller --clean installer\pyinstaller\custom-image-validation-tool-win.spec 36 | 37 | mkdir pyinstaller-output 38 | mkdir pyinstaller-output\bin 39 | robocopy /e /move dist pyinstaller-output\bin 40 | robocopy /e installer\assets pyinstaller-output 41 | del pyinstaller-output\installation 42 | cd .. 43 | robocopy /e src\pyinstaller-output output/amazon-emr-on-eks-custom-image-cli 44 | robocopy src output/amazon-emr-on-eks-custom-image-cli README.md -------------------------------------------------------------------------------- /tests/helper/test_print_message.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import io 3 | from custom_image_cli import __version__ 4 | from unittest import mock 5 | from custom_image_cli.helper import print_message 6 | 7 | 8 | class TestPrintMessage(unittest.TestCase): 9 | @mock.patch('sys.stdout', new_callable=io.StringIO) 10 | def test_print_pre(self, mock_stdout): 11 | expected = "Amazon EMR on EKS - Custom Image CLI\nVersion: %s\n" % __version__ 12 | print_message.print_pre_verification_text() 13 | self.assertEqual(mock_stdout.getvalue(), expected) 14 | 15 | @mock.patch('sys.stdout', new_callable=io.StringIO) 16 | def test_validate_completion_msg_succeed(self, mock_stdout): 17 | expected = "-----------------------------------------------------------------\n" \ 18 | "Overall Custom Image Validation Succeeded.\n" \ 19 | "-----------------------------------------------------------------\n" 20 | print_message.print_validate_completion_message(True) 21 | self.assertEqual(mock_stdout.getvalue(), expected) 22 | 23 | 24 | @mock.patch('sys.stdout', new_callable=io.StringIO) 25 | def test_validate_completion_msg_fail(self, mock_stdout): 26 | expected = "-----------------------------------------------------------------\n" \ 27 | "Custom Image Validation Failed. Please see individual test results above for detailed information.\n" \ 28 | "-----------------------------------------------------------------\n" 29 | print_message.print_validate_completion_message(False) 30 | self.assertEqual(mock_stdout.getvalue(), expected) 31 | 32 | 33 | -------------------------------------------------------------------------------- /tests/validation_tool/validation_tests/test_check_local_job_run.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from docker.errors import ContainerError 5 | 6 | from custom_image_cli.helper import logging 7 | from custom_image_cli.validation_tool.validation_tests.check_local_job_run import CheckLocalJobRun 8 | 9 | 10 | class TestCheckLocalSparkJob(unittest.TestCase): 11 | def setUp(self) -> None: 12 | self.log = logging.Log() 13 | 14 | @mock.patch('custom_image_cli.helper.docker_util.docker_cl.DockerCommand') 15 | def test_check(self, docker_constructor): 16 | docker_cmd = docker_constructor.return_value 17 | logger = self.log 18 | sanity_check_instance = CheckLocalJobRun('image_uri', docker_cmd, logger) 19 | 20 | docker_cmd.docker_run = mock.Mock(return_value=None) 21 | with self.assertLogs(logger.log) as t: 22 | result = sanity_check_instance.check() 23 | docker_cmd.docker_run.assert_called_once() 24 | self.assertEqual(result, 1) 25 | expected = 'INFO:logger:Sample Spark Job Test with ' \ 26 | 'local:///usr/lib/spark/examples/jars/spark-examples.jar : PASS' 27 | self.assertIn(expected, t.output) 28 | 29 | docker_cmd.docker_run.side_effect = ContainerError(None, '', '', '', None) 30 | with self.assertLogs(logger.log) as t: 31 | result = sanity_check_instance.check() 32 | self.assertEqual(result, 0) 33 | expected = 'ERROR:logger:Sample Spark Job Test with ' \ 34 | 'local:///usr/lib/spark/examples/jars/spark-examples.jar : FAIL' 35 | self.assertIn(expected, t.output) 36 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_helper.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.validation_tool.validation_tests import check_files, check_envs, check_manifest, check_local_job_run 2 | from custom_image_cli.validation_tool import check_inputs 3 | 4 | 5 | def validate_all(inspect_result, 6 | docker_cmd, 7 | docker_image_uri, 8 | image_manifest, 9 | release_name, 10 | image_type, 11 | log): 12 | print("... Checking Image Manifest", flush=True) 13 | image, file_structure, env_vars = load_validation_info(image_manifest, release_name, image_type, log) 14 | 15 | # tests 16 | all_tests = [check_manifest.CheckManifest(inspect_result, image.manifest_config, log), 17 | check_envs.CheckEnvs(inspect_result['Config']['Env'], image.env_vars, env_vars, log), 18 | check_files.CheckFiles(image.file_structures, file_structure, docker_cmd, docker_image_uri, log), 19 | check_local_job_run.CheckLocalJobRun(docker_image_uri, docker_cmd, log)] 20 | 21 | result = [test.check() for test in all_tests] 22 | return all(result) 23 | 24 | 25 | def load_validation_info(image_manifest, release_name, image_type, log): 26 | emr_releases = image_manifest.emr_releases 27 | file_structures = image_manifest.file_structures 28 | env_vars = image_manifest.env_vars 29 | 30 | # check user inputs 31 | release = None 32 | for emr_release in emr_releases: 33 | if release_name == emr_release.release_name: 34 | release = emr_release 35 | check_inputs.check_version(release, release_name, log) 36 | 37 | image = None 38 | for img in release.images: 39 | if image_type == img.image_type: 40 | image = img 41 | check_inputs.check_image(image, image_type, log) 42 | 43 | return image, file_structures, env_vars 44 | -------------------------------------------------------------------------------- /installer/pyinstaller/build-mac.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | binary_zip_filename=$1 3 | python_version=$2 4 | 5 | if [ "$python_version" = "" ]; then 6 | python_version="3.7.9"; 7 | fi 8 | 9 | 10 | set -eu 11 | 12 | 13 | echo "Making Folders" 14 | mkdir -p .build_mac/src 15 | mkdir -p .build_mac/output/amazon-emr-on-eks-custom-image-cli-src 16 | mkdir -p .build_mac/output/pyinstaller-output 17 | cd .build_mac 18 | 19 | echo "Copying Source" 20 | cp -r ../[!.]* ./src 21 | cd src 22 | rm -rf scripts 23 | rm -f Makefile 24 | rm -rf venv 25 | rm -rf __pycache__ 26 | cd .. 27 | cp -r ./src/* ./output/amazon-emr-on-eks-custom-image-cli-src 28 | 29 | echo "Installing Python3" 30 | curl "https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz" --output python.tgz 31 | tar -xzf python.tgz 32 | cd Python-$python_version 33 | ./configure --enable-shared 34 | make -j8 35 | make install 36 | cd .. 37 | 38 | echo "Installing Python Libraries" 39 | python3 -m venv venv 40 | ./venv/bin/pip3 install --upgrade pip 41 | ./venv/bin/pip3 install -r src/requirements.txt 42 | 43 | echo "Installing PyInstaller" 44 | ./venv/bin/pip3 install -r src/pyinstaller-build.txt 45 | 46 | echo "Building Binary" 47 | cd src 48 | echo "custom-image-validation-tool.spec content is:" 49 | cat installer/pyinstaller/custom-image-validation-tool.spec 50 | ../venv/bin/python3 -m PyInstaller -F --clean installer/pyinstaller/custom-image-validation-tool.spec 51 | 52 | mkdir -p pyinstaller-output 53 | mkdir -p pyinstaller-output/dist 54 | mv dist/* pyinstaller-output/dist 55 | cp installer/assets/* pyinstaller-output 56 | chmod 755 pyinstaller-output/installation 57 | 58 | echo "install script content is:" 59 | cat pyinstaller-output/installation 60 | echo "Copying Binary" 61 | cd .. 62 | cp -r src/pyinstaller-output/* output/pyinstaller-output 63 | 64 | echo "Packaging Binary" 65 | cd output 66 | cd pyinstaller-output 67 | cd dist 68 | cd .. 69 | zip -r ../"$binary_zip_filename" ./* 70 | cd .. 71 | zip -r "$binary_zip_filename" amazon-emr-on-eks-custom-image-cli-src 72 | -------------------------------------------------------------------------------- /installer/pyinstaller/build-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | binary_zip_filename=$1 3 | python_version=$2 4 | 5 | if [ "$python_version" = "" ]; then 6 | python_version="3.7.9"; 7 | fi 8 | 9 | 10 | set -eu 11 | 12 | export LD_LIBRARY_PATH=/lib:/usr/lib:/usr/local/lib 13 | yum install libffi-devel 14 | yum install -y zlib-devel openssl-devel 15 | 16 | echo "Making Folders" 17 | mkdir -p .build_linux/src 18 | mkdir -p .build_linux/output/amazon-emr-on-eks-custom-image-test-src 19 | mkdir -p .build_linux/output/pyinstaller-output 20 | cd .build_linux 21 | 22 | echo "Copying Source" 23 | cp -r ../[!.]* ./src 24 | cd src 25 | rm -f Makefile 26 | cd .. 27 | cp -r ./src/* ./output/amazon-emr-on-eks-custom-image-test-src 28 | 29 | echo "Installing Python3" 30 | curl "https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz" --output python.tgz 31 | tar -xzf python.tgz 32 | cd Python-$python_version 33 | ./configure --enable-shared 34 | make -j8 35 | make install 36 | cd .. 37 | 38 | echo "Installing Python Libraries" 39 | python3 -m venv venv 40 | ./venv/bin/pip3 install --upgrade pip 41 | ./venv/bin/pip3 install -r src/requirements.txt 42 | 43 | echo "Installing PyInstaller" 44 | ./venv/bin/pip3 install -r src/pyinstaller-build.txt 45 | 46 | echo "Building Binary" 47 | cd src 48 | echo "custom-image-validation-tool.spec content is:" 49 | cat installer/pyinstaller/custom-image-validation-tool.spec 50 | ../venv/bin/python3 -m PyInstaller -F --clean installer/pyinstaller/custom-image-validation-tool.spec 51 | 52 | mkdir -p pyinstaller-output 53 | mkdir -p pyinstaller-output/dist 54 | mv dist/* pyinstaller-output/dist 55 | cp installer/assets/* pyinstaller-output 56 | chmod 755 pyinstaller-output/installation 57 | 58 | echo "install script content is:" 59 | cat pyinstaller-output/installation 60 | echo "Copying Binary" 61 | cd .. 62 | cp -r src/pyinstaller-output/* output/pyinstaller-output 63 | 64 | echo "Packaging Binary" 65 | yum install zip 66 | cd output 67 | cd pyinstaller-output 68 | cd dist 69 | cd .. 70 | zip -r ../"$binary_zip_filename" ./* 71 | cd .. 72 | zip -r "$binary_zip_filename" amazon-emr-on-eks-custom-image-test-src 73 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_tests/check_files.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.validation_tool.validation_tests import base_check 2 | 3 | 4 | class CheckFiles(base_check.BaseCheck): 5 | 6 | def __init__(self, file_structure_list, file_structure, docker_cmd, docker_image_uri, log): 7 | self.file_structure_list = file_structure_list 8 | self.file_structure = file_structure 9 | self.docker_cmd = docker_cmd 10 | self.docker_image_uri = docker_image_uri 11 | self.log = log 12 | 13 | def check(self): 14 | file_system_test = True 15 | for file_structure_name in self.file_structure_list: 16 | file_structure = [structure for structure in self.file_structure if structure.name == file_structure_name][0] 17 | name = file_structure.name 18 | path = file_structure.relative_location 19 | file_prefixes = file_structure.file_prefixes 20 | if not self.match(name, path, file_prefixes): 21 | file_system_test = False 22 | return file_system_test 23 | 24 | def match(self, name, path, file_prefixes): 25 | local_test_pass = True 26 | docker_log = self.docker_cmd.docker_list_files(self.docker_image_uri, path) 27 | files = docker_log.decode().split('\n')[1:] 28 | for prefix in file_prefixes: 29 | is_match = False 30 | for file in files: 31 | if file.startswith(prefix): 32 | is_match = True 33 | break 34 | if not is_match: 35 | self.log.error("%s MUST be in %s : FAIL" % (prefix, path)) 36 | local_test_pass = False 37 | 38 | if local_test_pass: 39 | self.log.info("File Structure Test for %s in %s: PASS" % (name, path)) 40 | return local_test_pass 41 | 42 | def set_file_structure_list(self, file_structure_list): 43 | self.file_structure_list = file_structure_list 44 | 45 | def set_file_structure(self, file_structure): 46 | self.file_structure = file_structure 47 | 48 | def set_image_uri(self, image_uri): 49 | self.docker_image_uri = image_uri 50 | -------------------------------------------------------------------------------- /installer/assets/INSTALLATION_GUIDE.md: -------------------------------------------------------------------------------- 1 | ### Installation Guide 2 | 3 | #### Install on Linux and Mac 4 | 5 | 1. Use Homebrew for Mac/Linux users: 6 | ``` 7 | brew tap aws/tap 8 | brew install emr-on-eks-custom-image 9 | ``` 10 | For Linux users, another option is to install using installation script: 11 | 12 | Download the latest [Linux Release](https://github.com/awslabs/amazon-emr-on-eks-custom-image-cli/releases/download/v1.06/amazon-emr-on-eks-custom-image-cli-linux-v1.06.zip) 13 | 14 | ``` 15 | sudo ./installation 16 | ``` 17 | 2. Run the tool 18 | ``` 19 | emr-on-eks-custom-image --version 20 | ``` 21 | 22 | Once succeeded, you will see the output message: 23 | ``` 24 | Amazon EMR on EKS Custom Image CLI 25 | Version: X.XX 26 | ``` 27 | 28 | To Uninstall, follow: 29 | 30 | For Homebrew, run 31 | ``` 32 | brew uninstall emr-on-eks-custom-image 33 | ``` 34 | 35 | For installation script 36 | 37 | 1. Find the symlink using which 38 | ``` 39 | which emr-on-eks-custom-image 40 | ``` 41 | The output should be where the binary located: `/usr/local/bin/emr-on-eks-custom-image` 42 | 43 | 2. Find the directory the symlink points to 44 | ``` 45 | ls -l /usr/local/bin/emr-on-eks-custom-image 46 | ``` 47 | The output should be the installation directory: `/usr/local/amazon-emr-on-eks-custom-image-cli` 48 | 49 | 3. Delete the symlink and installation directory 50 | ``` 51 | sudo rm /usr/local/bin/emr-on-eks-custom-image 52 | sudo rm -rf /usr/local/amazon-emr-on-eks-custom-image-cli 53 | ``` 54 | 55 | #### Install on Windows 56 | 57 | Download the latest [Windows Release](https://github.com/awslabs/amazon-emr-on-eks-custom-image-cli/releases/download/v1.06/amazon-emr-on-eks-custom-iamge-cli-v1.06.msi) 58 | 59 | 1. Install Amazon EMR on EKS Custom Image CLI using MSI Installer. 60 | 2. Run the tool 61 | ``` 62 | emr-on-eks-custom-image --version 63 | ``` 64 | 65 | Once succeeded, you will see the output message: 66 | ``` 67 | Amazon EMR on EKS Custom Image CLI 68 | Version: X.XX 69 | ``` 70 | 71 | To Uninstall, follow: 72 | 73 | 1. From the Start menu, search for "Add or remove programs". 74 | 75 | 2. Select the entry named Amazon EMR on EKS Custom Image CLI and choose Uninstall to launch the uninstaller. 76 | -------------------------------------------------------------------------------- /custom_image_cli/helper/argument_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from custom_image_cli import __version__ 4 | 5 | 6 | # Parses command line arguments and assigns values to global variables 7 | class ArgsParser(argparse.ArgumentParser): 8 | def error(self, msg): 9 | sys.stderr.write('Error: %s \n' % msg) 10 | self.print_help() 11 | sys.exit(2) 12 | 13 | 14 | def parse_commandline_arguments(args=None): 15 | if args is None: 16 | args = sys.argv[1:] 17 | 18 | main_parser = ArgsParser(prog="emr-on-eks-custom-image", 19 | formatter_class=argparse.RawTextHelpFormatter) 20 | main_parser.add_argument('--version', action='version', 21 | version='Amazon EMR on EKS Custom Image CLI ' 22 | '\nVersion: {version}'.format(version=__version__)) 23 | subparsers = main_parser.add_subparsers(dest="command") 24 | 25 | validate_image_parser = parse_validate_image(subparsers) 26 | main_parser_args = main_parser.parse_args(args) 27 | 28 | return main_parser_args 29 | 30 | 31 | def parse_validate_image(subparsers): 32 | validate_image_parser = subparsers.add_parser(name="validate-image", 33 | formatter_class=argparse.RawTextHelpFormatter) 34 | validate_image_parser.add_argument('--version', action='version', 35 | version='%(prog)s \nVersion: {version}'.format(version=__version__)) 36 | validate_image_parser.add_argument("-i", "--local-image-uri", 37 | help="specifies the name of image uri", 38 | required=True) 39 | validate_image_parser.add_argument("-r", "--release-name", 40 | help="specifies the release name of the image. e.g. emr-5.32.0", 41 | required=True) 42 | validate_image_parser.add_argument("-t", "--image-type", 43 | help="specifies the image runtime type. e.g. spark \ndefault runtime type is " 44 | "spark") 45 | return validate_image_parser 46 | -------------------------------------------------------------------------------- /tests/validation_tool/validation_tests/test_check_envs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | from tests.lib.utils import INSPECT 4 | from custom_image_cli.helper import logging 5 | from custom_image_cli.validation_tool.validation_tests.check_envs import CheckEnvs 6 | from custom_image_cli.validation_tool.validation_models.validation_models import EnvironmentVariable 7 | 8 | 9 | 10 | class TestEnvs(unittest.TestCase): 11 | def setUp(self) -> None: 12 | self.log = logging.Log() 13 | self.inspect = INSPECT 14 | self.env_list = ["env1", "env2"] 15 | self.envs = [EnvironmentVariable("env1", "env1", "path1"), EnvironmentVariable("env2", "env2", "path2")] 16 | 17 | def test_match(self): 18 | env_vars = {'env1': 'path1', 'env2':'path2'} 19 | env_path = self.inspect['Config']['Env'] 20 | env_check_instance = CheckEnvs(env_path, self.env_list, self.envs, self.log) 21 | 22 | with self.assertLogs(self.log.log) as t: 23 | result = env_check_instance.match(env_vars) 24 | self.assertEqual(result, 1) 25 | expected = 'INFO:logger:env1 is set with value: path1 : PASS' 26 | self.assertIn(expected, t.output) 27 | expected = 'INFO:logger:env2 is set with value: path2 : PASS' 28 | self.assertIn(expected, t.output) 29 | 30 | env_vars['env2'] = 'path3' 31 | with self.assertLogs(self.log.log) as t: 32 | result = env_check_instance.match(env_vars) 33 | self.assertEqual(result, 0) 34 | expected = 'INFO:logger:env1 is set with value: path1 : PASS' 35 | self.assertIn(expected, t.output) 36 | expected = 'ERROR:logger:env2 MUST set to path2 : FAIL' 37 | self.assertIn(expected, t.output) 38 | 39 | @mock.patch('custom_image_cli.validation_tool.validation_tests.check_envs.CheckEnvs.match') 40 | def test_check(self, match): 41 | env_path = self.inspect['Config']['Env'] 42 | env_check_instance = CheckEnvs(env_path, self.env_list, self.envs, self.log) 43 | 44 | match.return_value = 1 45 | actual = env_check_instance.check() 46 | expected = {'env1': 'path1', 'env2': 'path2'} 47 | match.assert_called_once_with(expected) 48 | self.assertEqual(actual, 1) 49 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_tests/check_manifest.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.validation_tool.validation_tests import base_check 2 | 3 | 4 | class CheckManifest(base_check.BaseCheck): 5 | 6 | def __init__(self, inspect_result, manifest_config, log): 7 | self.inspect_result = inspect_result 8 | self.manifest_config = manifest_config 9 | self.log = log 10 | 11 | def check(self): 12 | manifest_validation_test = True 13 | 14 | image_id = self.inspect_result['Id'].split(":")[1] 15 | 16 | self.log.info("Image ID: %s" % image_id) 17 | self.log.info("Created On: %s" % self.inspect_result['Created']) 18 | 19 | # test username 20 | username = self.inspect_result['Config']['User'] 21 | target_username = self.manifest_config.user 22 | if username == target_username: 23 | self.log.info("Default User Set to %s : PASS" % target_username) 24 | else: 25 | self.log.error("Default User MUST be %s. Set to %s : FAIL" % (target_username, username)) 26 | manifest_validation_test = False 27 | 28 | # test workingDir 29 | working_dir = self.inspect_result['Config']['WorkingDir'] 30 | target_working_dir = self.manifest_config.working_dir 31 | if working_dir == target_working_dir: 32 | self.log.info("Working Directory Set to %s : PASS" % target_working_dir) 33 | else: 34 | self.log.error("Working Directory MUST be %s. Set to %s : FAIL" % (target_working_dir, working_dir)) 35 | manifest_validation_test = False 36 | 37 | # test entrypoint 38 | entrypoint = self.inspect_result['Config']['Entrypoint'][0] 39 | target_entrypoint = self.manifest_config.entrypoint 40 | if entrypoint == target_entrypoint: 41 | self.log.info("Entrypoint Set to %s : PASS" % target_entrypoint) 42 | else: 43 | self.log.error("Entrypoint MUST be %s. Set to %s : FAIL" % (target_entrypoint, entrypoint)) 44 | manifest_validation_test = False 45 | 46 | return manifest_validation_test 47 | 48 | def set_inspect_result(self, inspect): 49 | self.inspect_result = inspect 50 | 51 | def set_manifest_config(self, manifest_config): 52 | self.manifest_config = manifest_config 53 | -------------------------------------------------------------------------------- /tests/validation_tool/validation_tests/test_check_files.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | from custom_image_cli.helper import logging 4 | from custom_image_cli.validation_tool.validation_tests.check_files import CheckFiles 5 | from custom_image_cli.validation_tool.validation_models.validation_models import FileStructure 6 | 7 | 8 | class TestCheckFileStructure(unittest.TestCase): 9 | def setUp(self) -> None: 10 | self.log = logging.Log() 11 | 12 | @mock.patch('custom_image_cli.helper.docker_util.docker_cl.DockerCommand') 13 | def test_match(self, docker_constructor): 14 | docker_cmd = docker_constructor.return_value 15 | logger = self.log 16 | docker_cmd.docker_list_files = mock.MagicMock(name="match", return_value=b'something\ntests\nhere\n') 17 | file_structure_list = ['Test1'] 18 | file_structure = [FileStructure('Test1', '/usr/bin', ['asdfasdf', 'sdfasdf'])] 19 | file_check_instance = CheckFiles(file_structure_list, file_structure, docker_cmd, 'image_uri', logger) 20 | 21 | file_prefixes = ['test'] 22 | with self.assertLogs(logger.log) as t: 23 | result = file_check_instance.match('test', 'path', file_prefixes) 24 | docker_cmd.docker_list_files.assert_called_once() 25 | self.assertEqual(result, 1) 26 | expected = 'INFO:logger:File Structure Test for test in path: PASS' 27 | self.assertIn(expected, t.output) 28 | 29 | file_prefixes = ['test2'] 30 | with self.assertLogs(logger.log) as t: 31 | expected = "ERROR:logger:test2 MUST be in path : FAIL" 32 | result = file_check_instance.match('test', 'path', file_prefixes) 33 | self.assertEqual(result, 0) 34 | self.assertIn(expected, t.output) 35 | 36 | @mock.patch('custom_image_cli.validation_tool.validation_tests.check_files.CheckFiles.match') 37 | def test_check(self, match): 38 | logger = self.log 39 | match.return_value = 1 40 | file_structure_list = ['Test1'] 41 | file_structure = [FileStructure('Test1', '/usr/bin', ['asdfasdf', 'sdfasdf'])] 42 | file_check_instance = CheckFiles(file_structure_list, file_structure, "docker_cmd", 'image_uri', logger) 43 | 44 | actual = file_check_instance.check() 45 | match.assert_called_once() 46 | self.assertEqual(actual, 1) 47 | 48 | match.return_value = 0 49 | actual = file_check_instance.check() 50 | self.assertEqual(actual, 0) 51 | -------------------------------------------------------------------------------- /tests/validation_tool/validation_tests/test_check_manifest.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tests.lib.utils import INSPECT 3 | from custom_image_cli.helper import logging 4 | from custom_image_cli.validation_tool.validation_tests.check_manifest import CheckManifest 5 | from custom_image_cli.validation_tool.validation_models.validation_models import ManifestConfig 6 | 7 | 8 | class TestManifest(unittest.TestCase): 9 | def setUp(self) -> None: 10 | self.log = logging.Log() 11 | self.inspect = INSPECT 12 | self.manifest_config = ManifestConfig("entrypoint", "user", "workingdir") 13 | 14 | def test_check_manifest(self): 15 | with self.assertLogs(self.log.log) as l: 16 | manifest_check_instance = CheckManifest(self.inspect, self.manifest_config, self.log) 17 | result = manifest_check_instance.check() 18 | self.assertEqual(result, True) 19 | expected = ['INFO:logger:Image ID: %s' % self.inspect['Id'].split(":")[1], 20 | 'INFO:logger:Created On: %s' % self.inspect['Created'], 21 | 'INFO:logger:Default User Set to %s : PASS' % self.manifest_config.user, 22 | 'INFO:logger:Working Directory Set to %s : PASS' % self.manifest_config.working_dir, 23 | 'INFO:logger:Entrypoint Set to %s : PASS' % self.manifest_config.entrypoint] 24 | self.assertEqual(expected, l.output) 25 | 26 | self.inspect['Config']['User'] = 'other' 27 | self.inspect['Config']['WorkingDir'] = 'other' 28 | self.inspect['Config']['Entrypoint'][0] = 'other' 29 | with self.assertLogs(self.log.log) as l: 30 | manifest_check_instance.set_inspect_result(self.inspect) 31 | result = manifest_check_instance.check() 32 | self.assertEqual(result, False) 33 | 34 | expected = ['INFO:logger:Image ID: %s' % self.inspect['Id'].split(":")[1], 35 | 'INFO:logger:Created On: %s' % self.inspect['Created'], 36 | 'ERROR:logger:Default User MUST be %s. Set to %s : FAIL' 37 | % (self.manifest_config.user, self.inspect['Config']['User']), 38 | 'ERROR:logger:Working Directory MUST be %s. Set to %s : FAIL' 39 | % (self.manifest_config.working_dir, self.inspect['Config']['WorkingDir']), 40 | 'ERROR:logger:Entrypoint MUST be %s. Set to %s : FAIL' 41 | % (self.manifest_config.entrypoint, self.inspect['Config']['Entrypoint'][0])] 42 | self.assertEqual(expected, l.output) 43 | 44 | -------------------------------------------------------------------------------- /custom_image_cli/commands/validate_image.py: -------------------------------------------------------------------------------- 1 | from custom_image_cli.commands import base_command 2 | import sys, os 3 | from custom_image_cli.helper.docker_util import docker_helper, docker_cl 4 | from custom_image_cli.helper import manifest_reader 5 | from custom_image_cli.validation_tool import validation_helper 6 | from custom_image_cli.helper.print_message import print_validate_completion_message 7 | 8 | # Check if this is an executable. 9 | if getattr(sys, 'frozen', False): 10 | IMAGE_MANIFEST_YAML = sys._MEIPASS + "/assets/image-manifest.yaml" 11 | else: 12 | curr_dir = os.path.dirname(__file__) 13 | IMAGE_MANIFEST_YAML = os.path.join(curr_dir, "../../assets/image-manifest.yaml") 14 | 15 | 16 | class ValidateImage(base_command.BaseCommand): 17 | 18 | def __init__(self): 19 | self.args = None 20 | self.log = None 21 | self.test_num = None 22 | self.image_manifest = None 23 | self.docker_cmd = None 24 | self.inspect_result = None 25 | 26 | def initiate(self, args, log, test_num=3): 27 | self.args = args 28 | self.log = log 29 | self.test_num = test_num 30 | # load image manifest 31 | try: 32 | self.image_manifest = manifest_reader.load_yaml(IMAGE_MANIFEST_YAML) 33 | except Exception as e: 34 | print(e) 35 | self.log.error("image-manifest.yaml doesn't exist and is required.") 36 | sys.exit(2) 37 | 38 | # initialize docker 39 | try: 40 | docker_helper.verify_docker() 41 | self.docker_cmd = docker_cl.DockerCommand() 42 | except Exception: 43 | self.log.error("docker cli doesn't exist but is required.") 44 | sys.exit(2) 45 | 46 | # inspect image 47 | try: 48 | self.inspect_result = self.docker_cmd.docker_inspect(args.local_image_uri) 49 | except Exception: 50 | self.log.error("No such image found.") 51 | sys.exit(2) 52 | 53 | # set default runtime image type to spark 54 | if self.args.image_type is None: 55 | self.args.image_type = 'spark' 56 | 57 | def run(self): 58 | validation_succeeded = validation_helper.validate_all(self.inspect_result, 59 | self.docker_cmd, 60 | self.args.local_image_uri, 61 | self.image_manifest, 62 | self.args.release_name, 63 | self.args.image_type, 64 | self.log) 65 | self.docker_cmd.close_docker() 66 | print_validate_completion_message(validation_succeeded) 67 | 68 | def set_args(self, args): 69 | self.args = args 70 | 71 | def set_docker_command(self, docker_command): 72 | self.docker_cmd = docker_command 73 | -------------------------------------------------------------------------------- /tests/commands/test_validate_image.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | from argparse import Namespace 4 | from tests.lib.utils import IMAGE, IMAGE_TYPE, RELEASE_NAME 5 | from custom_image_cli.commands.validate_image import ValidateImage 6 | 7 | 8 | class testValidateImage(unittest.TestCase): 9 | 10 | @mock.patch("custom_image_cli.helper.docker_util.docker_cl.DockerCommand.close_docker") 11 | @mock.patch("custom_image_cli.helper.docker_util.docker_cl.DockerCommand.__init__") 12 | @mock.patch("custom_image_cli.commands.validate_image.ValidateImage.initiate") 13 | @mock.patch("custom_image_cli.validation_tool.validation_helper.validate_all") 14 | def test_run(self, validate_all, initiate, docker_cmd_constructor, close_docker): 15 | docker_cmd_constructor.return_value = None 16 | close_docker.return_value = None 17 | args = Namespace(local_image_uri=IMAGE, release_name=RELEASE_NAME, image_type=IMAGE_TYPE) 18 | 19 | validate_all.return_value = 3 20 | validate_image_instance = ValidateImage() 21 | initiate.return_value = None 22 | validate_image_instance.set_args(args) 23 | validate_image_instance.set_docker_command(docker_cmd_constructor) 24 | 25 | self.assertIsNone(validate_image_instance.run()) 26 | 27 | validate_all.assert_called_once() 28 | 29 | @mock.patch("custom_image_cli.helper.docker_util.docker_helper.verify_docker") 30 | @mock.patch("custom_image_cli.helper.docker_util.docker_cl.DockerCommand.docker_inspect") 31 | @mock.patch("custom_image_cli.helper.docker_util.docker_cl.DockerCommand.__init__") 32 | @mock.patch("custom_image_cli.helper.manifest_reader.load_yaml") 33 | @mock.patch("custom_image_cli.helper.logging.Log.info") 34 | @mock.patch("custom_image_cli.helper.logging.Log.error") 35 | @mock.patch("custom_image_cli.helper.logging.Log.__init__") 36 | def test_initialize(self, logger, log_error, log_info, 37 | load_yaml, docker_cmd_constructor, docker_inspect, verify_docker): 38 | validate_image_instance = ValidateImage() 39 | 40 | logger.return_value = None 41 | log_error.return_value = None 42 | log_info.return_value = None 43 | 44 | load_yaml.return_value = dict() 45 | docker_cmd_constructor.return_value = None 46 | args = Namespace(local_image_uri=IMAGE, release_name=RELEASE_NAME, image_type=IMAGE_TYPE) 47 | 48 | self.assertIsNone(validate_image_instance.initiate(args, logger)) 49 | load_yaml.assert_called_once() 50 | 51 | verify_docker.side_effect = Exception() 52 | with self.assertRaises(SystemExit) as t: 53 | validate_image_instance.initiate(args, logger) 54 | self.assertEqual(t.exception.code, 2) 55 | 56 | verify_docker.side_effect = None 57 | docker_inspect.side_effect = Exception() 58 | with self.assertRaises(SystemExit) as t: 59 | validate_image_instance.initiate(args, logger) 60 | self.assertEqual(t.exception.code, 2) 61 | 62 | load_yaml.side_effect = Exception() 63 | with self.assertRaises(SystemExit) as t: 64 | validate_image_instance.initiate(args, logger) 65 | self.assertEqual(t.exception.code, 2) 66 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /tests/validation_tool/test_validation_helper.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import io 3 | from unittest import mock 4 | from tests.lib.utils import INSPECT 5 | from custom_image_cli.validation_tool import validation_helper 6 | from custom_image_cli.validation_tool.validation_models.validation_models import \ 7 | ImageDetail, ImageManifest, EmrRelease 8 | 9 | 10 | class TestValidationHelper(unittest.TestCase): 11 | def setUp(self) -> None: 12 | self.inspect = INSPECT 13 | self.manifest = ImageManifest([EmrRelease("release_name", [ImageDetail("image_type", None, [], [])])], [], []) 14 | 15 | @mock.patch('sys.stdout', new_callable=io.StringIO) 16 | @mock.patch('custom_image_cli.validation_tool.validation_helper.load_validation_info') 17 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_local_job_run.CheckLocalJobRun.check") 18 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_manifest.CheckManifest.check") 19 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_manifest.CheckManifest.__init__") 20 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_files.CheckFiles.check") 21 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_files.CheckFiles.__init__") 22 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_envs.CheckEnvs.check") 23 | @mock.patch("custom_image_cli.validation_tool.validation_tests.check_envs.CheckEnvs.__init__") 24 | def test_validate_all(self, check_envs_constructor, check_envs, check_files_constructor, 25 | check_files, check_manifest_constructor, 26 | check_manifest, check_local_job_run, load_info, mock_stdout): 27 | check_envs_constructor.return_value = None 28 | check_envs.return_value = True 29 | check_files_constructor.return_value = None 30 | check_files.return_value = True 31 | check_manifest_constructor.return_value = None 32 | check_manifest.return_value = True 33 | check_local_job_run.return_value = True 34 | load_info.return_value = ImageDetail("image_type", None, [], []), [], [] 35 | 36 | actual = validation_helper.validate_all(self.inspect, "docker_cmd", "docker_image_uri", 37 | self.manifest, "release_name", "image_type", "log") 38 | self.assertEqual(actual, True) 39 | 40 | check_manifest.assert_called_once() 41 | check_envs.assert_called_once() 42 | check_files.assert_called_once() 43 | check_local_job_run.assert_called_once() 44 | 45 | expected = "... Checking Image Manifest\n" 46 | self.assertEqual(expected, mock_stdout.getvalue()) 47 | 48 | @mock.patch("custom_image_cli.validation_tool.check_inputs.check_version") 49 | @mock.patch("custom_image_cli.validation_tool.check_inputs.check_image") 50 | def test_load_validation_info(self, check_image, check_version): 51 | value = self.manifest 52 | check_version.return_value = None 53 | check_image.return_value = None 54 | 55 | actual_img, actual_file, actual_env = validation_helper.load_validation_info(self.manifest, "release_name", "image_type", "log") 56 | self.assertEqual(actual_img, self.manifest.emr_releases[0].images[0]) 57 | self.assertEqual(actual_file, []) 58 | self.assertEqual(actual_env, []) 59 | 60 | check_version.assert_called_once_with(self.manifest.emr_releases[0], "release_name", "log") 61 | check_image.assert_called_once_with(self.manifest.emr_releases[0].images[0], "image_type", "log") 62 | -------------------------------------------------------------------------------- /tests/helper/test_docker.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import io 3 | import docker 4 | import shutil 5 | import warnings 6 | from unittest import mock 7 | from custom_image_cli.helper.docker_util.docker_cl import DockerCommand 8 | from custom_image_cli.helper.docker_util.docker_helper import verify_docker 9 | from docker.models.containers import Container 10 | 11 | 12 | class TestDockers(unittest.TestCase): 13 | def setUp(self) -> None: 14 | warnings.filterwarnings(action="ignore", message="unclosed", category=ResourceWarning) 15 | docker.from_env = mock.Mock(return_value=docker.client.DockerClient()) 16 | self.docker_cmd = DockerCommand() 17 | docker.from_env.assert_called_once() 18 | 19 | @mock.patch('sys.stdout', new_callable=io.StringIO) 20 | def test_verify_docker(self, mock_stdout): 21 | shutil.which = mock.Mock() 22 | verify_docker() 23 | expected = "... Checking if docker cli is installed\n" 24 | self.assertEqual(expected, mock_stdout.getvalue()) 25 | shutil.which.assert_called_once_with("docker") 26 | 27 | def test_docker_inspect(self): 28 | self.docker_cmd.client.api.inspect_image = mock.Mock(return_value=dict()) 29 | actual = self.docker_cmd.docker_inspect("image") 30 | self.assertEqual(actual, {}) 31 | self.docker_cmd.client.api.inspect_image.assert_called() 32 | 33 | @mock.patch.object(docker.models.containers.ContainerCollection, "run") 34 | def test_docker_container(self, run): 35 | run.return_value = docker.models.containers.Container() 36 | actual = self.docker_cmd.create_container("image") 37 | self.assertIsInstance(actual, Container) 38 | self.docker_cmd.client.containers.run.assert_called_once() 39 | 40 | def test_docker_run(self): 41 | self.docker_cmd.create_container = mock.Mock(return_value=docker.models.containers.Container()) 42 | self.docker_cmd.container = self.docker_cmd.create_container() 43 | self.docker_cmd.container.exec_run = \ 44 | mock.Mock(return_value=docker.models.containers.ExecResult(exit_code=0, output=b'something')) 45 | actual = self.docker_cmd.docker_run("image", "command") 46 | self.assertEqual(actual, docker.models.containers.ExecResult(exit_code=0, output=b'something')) 47 | self.docker_cmd.container.exec_run.assert_called_once() 48 | 49 | def test_docker_list(self): 50 | self.docker_cmd.create_container = mock.Mock(return_value=docker.models.containers.Container()) 51 | self.docker_cmd.container = self.docker_cmd.create_container() 52 | self.docker_cmd.create_container.assert_called_once() 53 | 54 | self.docker_cmd.container.exec_run = \ 55 | mock.Mock(return_value=docker.models.containers.ExecResult(exit_code=0, output=b'something')) 56 | actual = self.docker_cmd.docker_list_files('somathing', '/usr/bin') 57 | self.assertIsInstance(actual, bytes) 58 | self.assertEqual(actual, b'something') 59 | self.docker_cmd.container.exec_run.assert_called_once() 60 | 61 | def test_docker_close(self): 62 | self.docker_cmd.create_container = mock.Mock(return_value=docker.models.containers.Container()) 63 | self.docker_cmd.container = self.docker_cmd.create_container() 64 | self.docker_cmd.create_container.assert_called_once() 65 | 66 | self.docker_cmd.container.stop = mock.Mock() 67 | self.docker_cmd.container.remove = mock.Mock() 68 | self.docker_cmd.client.close = mock.Mock() 69 | actual = self.docker_cmd.close_docker() 70 | self.assertIsNone(actual) 71 | self.docker_cmd.container.stop.assert_called_once() 72 | self.docker_cmd.container.remove.assert_called_once() 73 | self.docker_cmd.client.close.assert_called_once() 74 | 75 | -------------------------------------------------------------------------------- /installer/assets/installation: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). You 5 | # may not use this file except in compliance with the License. A copy of 6 | # the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. This file is 11 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 12 | # ANY KIND, either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | set -eu 15 | 16 | usage() { 17 | cat 1>&2 < The directory to install the validation tool. By 29 | default, this directory is: /usr/local/amazon-emr-on-eks-custom-image-cli 30 | -b, --bin-dir The directory to store symlinks to executables 31 | for the validation tool. By default, the directory 32 | used is: /usr/local/bin 33 | EOF 34 | } 35 | 36 | parse_commandline() { 37 | while test $# -gt 0 38 | do 39 | key="$1" 40 | case "$key" in 41 | -i|--install-dir) 42 | PARSED_INSTALL_DIR="$2" 43 | shift 44 | ;; 45 | -b|--bin-dir) 46 | PARSED_BIN_DIR="$2" 47 | shift 48 | ;; 49 | -u|--update) 50 | PARSED_UPGRADE="yes" 51 | ;; 52 | -h|--help) 53 | usage 54 | exit 0 55 | ;; 56 | *) 57 | die "Got an unexpected argument: $1" 58 | ;; 59 | esac 60 | shift 61 | done 62 | } 63 | 64 | set_global_vars() { 65 | ROOT_INSTALL_DIR=${PARSED_INSTALL_DIR:-/usr/local/amazon-emr-on-eks-custom-image-cli} 66 | BIN_DIR=${PARSED_BIN_DIR:-/usr/local/bin} 67 | UPGRADE=${PARSED_UPGRADE:-no} 68 | 69 | EXE_NAME="emr-on-eks-custom-image" 70 | TOOL_NAME="Amazon EMR on EKS Custom Image CLI" 71 | INSTALLER_DIR="$( cd "$( dirname "$0" )" >/dev/null 2>&1 && pwd )" 72 | INSTALLER_DIST_DIR="$INSTALLER_DIR/dist" 73 | INSTALLER_EXE="$INSTALLER_DIST_DIR/$EXE_NAME" 74 | EXE_VERSION=$($INSTALLER_EXE --version | cut -d ' ' -f 2) 75 | 76 | 77 | INSTALL_DIR="$ROOT_INSTALL_DIR/$EXE_VERSION" 78 | INSTALL_DIR="$INSTALL_DIR" 79 | INSTALL_DIST_DIR="$INSTALL_DIR/dist" 80 | INSTALL_BIN_DIR="$INSTALL_DIR/bin" 81 | INSTALL_EXE="$INSTALL_BIN_DIR/$EXE_NAME" 82 | 83 | CURRENT_INSTALL_DIR="$ROOT_INSTALL_DIR/current" 84 | CURRENT_EXE="$CURRENT_INSTALL_DIR/bin/$EXE_NAME" 85 | 86 | BIN_EXE="$BIN_DIR/$EXE_NAME" 87 | } 88 | 89 | create_install_dir() { 90 | mkdir -p "$INSTALL_DIR" || exit 1 91 | { 92 | setup_install_dist && 93 | setup_install_bin && 94 | create_current_symlink 95 | } || { 96 | rm -rf "$INSTALL_DIR" 97 | exit 1 98 | } 99 | } 100 | 101 | check_preexisting_install() { 102 | if [ -L "$CURRENT_INSTALL_DIR" ] && [ "$UPGRADE" = "no" ] 103 | then 104 | die "Found preexisting $TOOL_NAME installation: $CURRENT_INSTALL_DIR. Please rerun install script with --update flag." 105 | fi 106 | if [ -d "$INSTALL_DIR" ] 107 | then 108 | echo "Found same $TOOL_NAME version: $INSTALL_DIR. Skipping install." 109 | exit 0 110 | fi 111 | } 112 | 113 | setup_install_dist() { 114 | cp -r "$INSTALLER_DIST_DIR" "$INSTALL_DIST_DIR" 115 | } 116 | 117 | setup_install_bin() { 118 | mkdir -p "$INSTALL_BIN_DIR" 119 | ln -s "../dist/$EXE_NAME" "$INSTALL_EXE" 120 | } 121 | 122 | create_current_symlink() { 123 | ln -snf "$INSTALL_DIR" "$CURRENT_INSTALL_DIR" 124 | } 125 | 126 | create_bin_symlinks() { 127 | mkdir -p "$BIN_DIR" 128 | ln -sf "$CURRENT_EXE" "$BIN_EXE" 129 | } 130 | 131 | die() { 132 | err_msg="$1" 133 | echo "$err_msg" >&2 134 | exit 1 135 | } 136 | 137 | main() { 138 | parse_commandline "$@" 139 | set_global_vars 140 | check_preexisting_install 141 | create_install_dir 142 | create_bin_symlinks 143 | echo "You can now run: $EXE_NAME --version" 144 | exit 0 145 | } 146 | 147 | main "$@" || exit 1 148 | -------------------------------------------------------------------------------- /custom_image_cli/validation_tool/validation_models/validation_models.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | 4 | class ImageManifest(yaml.YAMLObject): 5 | yaml_tag = u'!ImageManifest' 6 | yaml_loader = yaml.SafeLoader 7 | 8 | # emr_releases: list['EmrRelease'], files_structures: list['FileStructure'], env_vars: list['EnvironmentVariable'] 9 | def __init__(self, emr_releases, files_structures, env_vars): 10 | self.emr_releases = emr_releases 11 | self.file_structures = files_structures 12 | self.env_vars = env_vars 13 | 14 | def to_dict(self): 15 | return {'EmrReleases': self.emr_releases, 16 | 'FileStructures': self.file_structures, 17 | 'EnvironmentVariables': self.env_vars} 18 | 19 | @classmethod 20 | def to_yaml(cls, dumper, data): 21 | data.__dict__ = data.to_dict() 22 | return dumper.represent_yaml_object(cls.yaml_tag, data, cls) 23 | 24 | @classmethod 25 | def from_yaml(cls, loader, node): 26 | for data in loader.construct_yaml_map(node): 27 | pass 28 | emr_releases = data['EmrReleases'] 29 | file_structure = data['FileStructures'] 30 | env_vars = data['EnvironmentVariables'] 31 | return cls(emr_releases, file_structure, env_vars) 32 | 33 | 34 | class EmrRelease(yaml.YAMLObject): 35 | yaml_tag = u'!EmrRelease' 36 | yaml_loader = yaml.SafeLoader 37 | 38 | # release_name: str, images: list['ImageDetail'] 39 | def __init__(self, release_name, images): 40 | self.release_name = release_name 41 | self.images = images 42 | 43 | def to_dict(self): 44 | return {'ReleaseName': self.release_name, 45 | 'Images': self.images} 46 | 47 | @classmethod 48 | def to_yaml(cls, dumper, data): 49 | data.__dict__ = data.to_dict() 50 | return dumper.represent_yaml_object(cls.yaml_tag, data, cls) 51 | 52 | @classmethod 53 | def from_yaml(cls, loader, node): 54 | for data in loader.construct_yaml_map(node): 55 | pass 56 | release_name = data['ReleaseName'] 57 | images = data['Images'] 58 | return cls(release_name, images) 59 | 60 | 61 | class EnvironmentVariable(yaml.YAMLObject): 62 | yaml_tag = u'!EnvironmentVariable' 63 | yaml_loader = yaml.SafeLoader 64 | 65 | # key: str, env_name: str, env_value: str 66 | def __init__(self, key, env_name, env_value): 67 | self.key = key 68 | self.env_name = env_name 69 | self.env_value = env_value 70 | 71 | def to_dict(self): 72 | return {'Key': self.key, 73 | 'EnvName': self.env_name, 74 | 'EnvValue': self.env_value} 75 | 76 | @classmethod 77 | def to_yaml(cls, dumper, data): 78 | data.__dict__ = data.to_dict() 79 | return dumper.represent_yaml_object(cls.yaml_tag, data, cls) 80 | 81 | @classmethod 82 | def from_yaml(cls, loader, node): 83 | for data in loader.construct_yaml_map(node): 84 | pass 85 | key = data['Key'] 86 | env_name = data['EnvName'] 87 | env_value = data['EnvValue'] 88 | return cls(key, env_name, env_value) 89 | 90 | 91 | class FileStructure(yaml.YAMLObject): 92 | yaml_tag = u'!FileStructure' 93 | yaml_loader = yaml.SafeLoader 94 | 95 | # name: str, relative_location: str, file_prefixes: list[str] 96 | def __init__(self, name, relative_location, file_prefixes): 97 | self.name = name 98 | self.relative_location = relative_location 99 | self.file_prefixes = file_prefixes 100 | 101 | def to_dict(self): 102 | return {'Name': self.name, 103 | 'RelativeLocation': self.relative_location, 104 | 'FilePrefixes': self.file_prefixes} 105 | 106 | @classmethod 107 | def to_yaml(cls, dumper, data): 108 | data.__dict__ = data.to_dict() 109 | return dumper.represent_yaml_object(cls.yaml_tag, data, cls) 110 | 111 | @classmethod 112 | def from_yaml(cls, loader, node): 113 | for data in loader.construct_yaml_map(node): 114 | pass 115 | name = data['Name'] 116 | relative_location = data['RelativeLocation'] 117 | file_prefixes = data['FilePrefixes'] 118 | return cls(name, relative_location, file_prefixes) 119 | 120 | 121 | class ImageDetail(yaml.YAMLObject): 122 | yaml_tag = u'!ImageDetail' 123 | yaml_loader = yaml.SafeLoader 124 | 125 | # image_type: str, manifest_config: 'ManifestConfig', env_vars: list[str], file_structures: list[str] 126 | def __init__(self, image_type, manifest_config, env_vars, file_structures): 127 | self.image_type = image_type 128 | self.manifest_config = manifest_config 129 | self.env_vars = env_vars 130 | self.file_structures = file_structures 131 | 132 | def to_dict(self): 133 | return {'ImageType': self.image_type, 134 | 'ManifestConfig': self.manifest_config, 135 | 'EnvironmentVariable': self.env_vars, 136 | 'FileStructure': self.file_structures} 137 | 138 | @classmethod 139 | def to_yaml(cls, dumper, data): 140 | data.__dict__ = data.to_dict() 141 | return dumper.represent_yaml_object(cls.yaml_tag, data, cls) 142 | 143 | @classmethod 144 | def from_yaml(cls, loader, node): 145 | for data in loader.construct_yaml_map(node): 146 | pass 147 | image_type = data['ImageType'] 148 | manifest_config = data['ManifestConfig'] 149 | env_vars = data['EnvironmentVariable'] 150 | file_structures = data['FileStructure'] 151 | return cls(image_type, manifest_config, env_vars, file_structures) 152 | 153 | 154 | class ManifestConfig(yaml.YAMLObject): 155 | yaml_tag = u'!ManifestConfig' 156 | yaml_loader = yaml.SafeLoader 157 | 158 | # entrypoint: str, user: str, working_dir: str 159 | def __init__(self, entrypoint, user, working_dir): 160 | self.entrypoint = entrypoint 161 | self.user = user 162 | self.working_dir = working_dir 163 | 164 | def to_dict(self): 165 | return {'Entrypoint': self.entrypoint, 166 | 'User': self.user, 167 | 'WorkingDir': self.working_dir} 168 | 169 | @classmethod 170 | def to_yaml(cls, dumper, data): 171 | data.__dict__ = data.to_dict() 172 | return dumper.represent_yaml_object(cls.yaml_tag, data, cls) 173 | 174 | @classmethod 175 | def from_yaml(cls, loader, node): 176 | for data in loader.construct_yaml_map(node): 177 | pass 178 | entrypoint = data['Entrypoint'] 179 | user = data['User'] 180 | working_dir = data['WorkingDir'] 181 | return cls(entrypoint, user, working_dir) 182 | -------------------------------------------------------------------------------- /DEVELOPMENT_GUIDE.md: -------------------------------------------------------------------------------- 1 | # Amazon EMR on EKS Custom Image CLI Development Guide 2 | 3 | This guide will help you set up your development environment for testing and contributing to custom image validation tool. 4 | If you found something is missing or inaccurate, update this guide and send a Pull Request. 5 | 6 | ## Get Source Code 7 | 8 | Pull source code from Github repository. 9 | 10 | ## Environment Set Up 11 | ### Prerequisite 12 | Before running this tool, please make sure you have Docker CLI installed. 13 | 14 | ### Install Docker CLI (Optional). 15 | 16 | This tool utilizes [Docker CLI](https://docs.docker.com/docker-for-mac/install/) to help validate custom images. 17 | Please make sure you have Docker CLI installed prior to using the tool. 18 | 19 | ### Create Virtual Environment 20 | To avoid messing up with global python environment, create a virtual environment for this tool 21 | under current folder: 22 | 23 | ```python3 -m venv ``` 24 | 25 | *Note: You can change the path for you virtual env to whatever you want, but be careful of the slight difference of 26 | the path in Mac and Windows.* 27 | 28 | To activate/deactivate virtual environment, run following command: 29 | 30 | * For Mac/Unix Users, run ```source /bin/activate``` 31 | 32 | * For Windows Users, run ```C:\> \Scripts\activate.bat``` 33 | 34 | To deactivate the venv, type in the shell: ``` deactivate ```. 35 | 36 | ### Install Required Dependencies. 37 | 38 | To ensure that all the required dependencies are successfully installed, run: 39 | ``` 40 | pip3 install -r requirements.txt 41 | ``` 42 | 43 | ### Set Python Path 44 | 45 | To avoid relative import error, set the Python path to current package folder: 46 | 47 | ```export PYTHONPATH=$PATHONPATH:`pwd` ``` in linux/macOS 48 | 49 | or 50 | 51 | ```set PYTHONPATH=``` in windows 52 | 53 | ## Validate Custom Image 54 | In the root directory, you can directly use python3 command to run the validation tool. 55 | 56 | Then run command: 57 | 58 | ``` 59 | python3 custom_image_cli validate-image -i -r [-t ] 60 | ``` 61 | 62 | -i specifies the local image URI that needs to be validated, this can be the image URI or any name/tag you defined for your image. 63 | 64 | -r specifies the exact release version of the EMR base image used to generate the customized image. For example, if the custom image was developed using EMR base image with release version 5.32.0, then the parameter should specify emr-5.32.0. 65 | 66 | -t specifies the image type. If this is a spark image, just input spark. The default value is `spark` and the current version only supports spark runtime images. 67 | 68 | After successfully running the tool, the log info will show test results. If the image doesn't meet necessary configuration requirements, you will see error messages that inform the missing part. 69 | 70 | #### Basic Test 71 | 72 | The [basic test](custom_image_cli/validation_tool/validation_tests/check_manifest.py) ensures the image contains expected configuration. The following parameters are verified in this test: 73 | 74 | * `UserName` 75 | * `WorkingDir` 76 | * `EntryPoint` 77 | 78 | #### Environment Test 79 | 80 | The [environment test](custom_image_cli/validation_tool/validation_tests/check_envs.py) ensures the required environment variables are set to the expected paths. 81 | 82 | Examples: 83 | * `SPARK_HOME=/usr/lib/spark` 84 | * `JAVA_HOME=/etc/alternatives/jre` 85 | 86 | #### File Structure Test 87 | 88 | The [file structure test](custom_image_cli/validation_tool/validation_tests/check_files.py) ensures the required files exist in expected locations. For different 89 | types of images, the required dependencies are different. You should make sure those files are in the correct 90 | location. 91 | 92 | #### Local Job Run Test 93 | 94 | The [local job run test](custom_image_cli/validation_tool/validation_tests/check_local_job_run.py) ensures that the custom image is valid and can pass basic job run. We will run a sample local spark job with following configuration: 95 | 96 | ``` 97 | docker run -it --rm spark-submit 98 | --deploy-mode client 99 | --master local 100 | --class org.apache.spark.examples.SparkPi local:///usr/lib/spark/examples/jars/spark-examples.jar 101 | ``` 102 | 103 | ### Output Results 104 | Examples: 105 | ``` 106 | Amazon EMR on EKS Custom Image CLI 107 | Version: x.xx 108 | ... Checking if docker cli is installed 109 | ... Checking Image Manifest 110 | [INFO] Image ID: c0749c685b2a3cf50ff18c41510324585748a225bc4804a46d96a947db03a53e 111 | [INFO] Created On: 2021-05-17T20:50:07.986662904Z 112 | [INFO] Default User Set to hadoop:hadoop : PASS 113 | [INFO] Working Directory Set to /home/hadoop : PASS 114 | [INFO] Entrypoint Set to /usr/bin/entrypoint.sh : PASS 115 | [INFO] SPARK_HOME is set with value: /usr/lib/spark : PASS 116 | [INFO] JAVA_HOME is set with value: /etc/alternatives/jre : PASS 117 | [INFO] File Structure Test for spark-jars in /usr/lib/spark/jars: PASS 118 | [INFO] File Structure Test for hadoop-files in /usr/lib/hadoop: PASS 119 | [INFO] File Structure Test for hadoop-jars in /usr/lib/hadoop/lib: PASS 120 | [INFO] File Structure Test for bin-files in /usr/bin: PASS 121 | ... Start Running Sample Spark Job 122 | [INFO] Sample Spark Job Test with local:///usr/lib/spark/examples/jars/spark-examples.jar : PASS 123 | ----------------------------------------------------------------- 124 | Overall Custom Image Validation Succeeded. 125 | ----------------------------------------------------------------- 126 | ``` 127 | 128 | Error Message: 129 | 130 | ``` 131 | Amazon EMR on EKS Custom Image CLI 132 | Version: x.xx 133 | ... Checking if docker cli is installed 134 | ... Checking Image Manifest 135 | [INFO] Image ID: xxxx 136 | [INFO] Created On: 2021-04-20T22:12:05.523378Z 137 | [INFO] Default User Set to hadoop:hadoop : PASS 138 | [INFO] Working Directory Set to /home/hadoop : PASS 139 | [INFO] Entrypoint Set to /usr/bin/entrypoint.sh : PASS 140 | [INFO] SPARK_HOME is set with value: /usr/lib/spark : PASS 141 | [INFO] JAVA_HOME is set with value: /etc/alternatives/jre : PASS 142 | [ERROR] mockito-all MUST be in /usr/lib/hadoop/lib : FAIL 143 | [ERROR] servlet-api MUST be in /usr/lib/hadoop/lib : FAIL 144 | [ERROR] spotbugs-annotations MUST be in /usr/lib/hadoop/lib : FAIL 145 | [ERROR] stax-api MUST be in /usr/lib/hadoop/lib : FAIL 146 | [ERROR] xmlenc MUST be in /usr/lib/hadoop/lib : FAIL 147 | [INFO] File structure test for bin-files in /usr/bin: PASS 148 | ... Start Running Sample Spark Job 149 | [ERROR] Sample Spark Job Test with local:///usr/lib/spark/examples/jars/spark-examples.jar : FAIL 150 | ----------------------------------------------------------------- 151 | Custom Image Validation Failed. Please see individual test results above for detailed information. 152 | ----------------------------------------------------------------- 153 | ``` 154 | 155 | ## Unit Test 156 | 157 | To run unit tests for this tool, you can use command `python3 -m unittest discover`. 158 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon EMR on EKS Custom Image CLI 2 | ## Introduction 3 | [Amazon EMR](https://aws.amazon.com/emr/) on [Amazon EKS](https://aws.amazon.com/eks/) provides support for 4 | Custom Images, a capability that enables you to customize the Docker container images used for running 5 | Apache Spark applications on [Amazon EMR on EKS](https://aws.amazon.com/emr/features/eks/). 6 | Custom images enables you to install and configure packages specific to your workload that are not available 7 | in the public distribution of EMR’s Spark runtime into a single immutable container. An immutable container 8 | promotes portability and simplifies dependency management for each workload and enables you to integrate 9 | developing applications for EMR on EKS with your own continuous integration (CI) pipeline. 10 | 11 | To test the compatibility of the modifications made to your EMR base image, we are providing a utility to validate 12 | the image’s file structure. The utility will examine basic required arguments and ensure that the modifications work as 13 | expected and prevent job failures due to common misconfigurations. This tool can be integrated into your Continuous 14 | Integration (CI) pipeline when you are building your image. For more information about customizing the EMR on EKS base 15 | image, see our [documentation](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/docker-custom-images.html). 16 | 17 | 18 | ## For Developers 19 | Developers who wish to develop on or contribute to the source code, please refer to [Contribution Guide](CONTRIBUTING.md) and [Development Guide](DEVELOPMENT_GUIDE.md). 20 | 21 | ## Getting Started 22 | 23 | ### Prerequisite 24 | Before running this tool, please make sure you have Docker CLI installed. 25 | 26 | #### Install Docker CLI (Optional). 27 | 28 | This tool utilizes [Docker CLI](https://docs.docker.com/get-docker/) to help validate custom images. 29 | Please make sure you have Docker CLI installed prior to using the tool. 30 | 31 | ### Installation 32 | 33 | Please follow the Installation Guide [here](installer/assets/INSTALLATION_GUIDE.md). 34 | 35 | ### Usage 36 | 37 | #### Validate Custom Image 38 | 39 | Use command: 40 | ``` 41 | emr-on-eks-custom-image validate-image -i -r [-t ] 42 | ``` 43 | 44 | -i specifies the local image URI that needs to be validated, this can be the image URI or any name/tag you defined for your image. 45 | 46 | -r specifies the exact release version of the EMR base image used to generate the customized image. For example, if the custom image was developed using EMR base image with release version 5.32.0, then the parameter should specify emr-5.32.0. 47 | 48 | -t specifies the image type. If this is a spark image, just input spark. The default value is `spark` and the current version only supports spark runtime images. 49 | 50 | After successfully running the tool, the log info will show test results. If the image doesn't meet necessary configuration requirements, you will see error messages that inform the missing part. 51 | 52 | #### Basic Test 53 | 54 | The [basic test](custom_image_cli/validation_tool/validation_tests/check_manifest.py) ensures the image contains expected configuration. The following parameters are verified in this test: 55 | 56 | * `UserName` 57 | * `WorkingDir` 58 | * `EntryPoint` 59 | 60 | #### Environment Test 61 | 62 | The [environment test](custom_image_cli/validation_tool/validation_tests/check_envs.py) ensures the required environment variables are set to the expected paths. 63 | 64 | Examples: 65 | * `SPARK_HOME=/usr/lib/spark` 66 | * `JAVA_HOME=/etc/alternatives/jre` 67 | 68 | #### File Structure Test 69 | 70 | The [file structure test](custom_image_cli/validation_tool/validation_tests/check_files.py) ensures the required files exist in expected locations. For different 71 | types of images, the required dependencies are different. You should make sure those files are in the correct 72 | location. 73 | 74 | #### Local Job Run Test 75 | 76 | The [local job run test](custom_image_cli/validation_tool/validation_tests/check_local_job_run.py) ensures that the custom image is valid and can pass basic job run. We will run a sample local spark job with following configuration: 77 | 78 | ``` 79 | docker run -it --rm spark-submit 80 | --deploy-mode client 81 | --master local 82 | --class org.apache.spark.examples.SparkPi local:///usr/lib/spark/examples/jars/spark-examples.jar 83 | ``` 84 | 85 | ### Output Results 86 | Examples: 87 | ``` 88 | Amazon EMR on EKS Custom Image CLI 89 | Version: x.xx 90 | ... Checking if docker cli is installed 91 | ... Checking Image Manifest 92 | [INFO] Image ID: c0749c685b2a3cf50ff18c41510324585748a225bc4804a46d96a947db03a53e 93 | [INFO] Created On: 2021-05-17T20:50:07.986662904Z 94 | [INFO] Default User Set to hadoop:hadoop : PASS 95 | [INFO] Working Directory Set to /home/hadoop : PASS 96 | [INFO] Entrypoint Set to /usr/bin/entrypoint.sh : PASS 97 | [INFO] SPARK_HOME is set with value: /usr/lib/spark : PASS 98 | [INFO] JAVA_HOME is set with value: /etc/alternatives/jre : PASS 99 | [INFO] File Structure Test for spark-jars in /usr/lib/spark/jars: PASS 100 | [INFO] File Structure Test for hadoop-files in /usr/lib/hadoop: PASS 101 | [INFO] File Structure Test for hadoop-jars in /usr/lib/hadoop/lib: PASS 102 | [INFO] File Structure Test for bin-files in /usr/bin: PASS 103 | ... Start Running Sample Spark Job 104 | [INFO] Sample Spark Job Test with local:///usr/lib/spark/examples/jars/spark-examples.jar : PASS 105 | ----------------------------------------------------------------- 106 | Overall Custom Image Validation Succeeded. 107 | ----------------------------------------------------------------- 108 | ``` 109 | 110 | Error Message: 111 | 112 | ``` 113 | Amazon EMR on EKS Custom Image CLI 114 | Version: x.xx 115 | ... Checking if docker cli is installed 116 | ... Checking Image Manifest 117 | [INFO] Image ID: xxxx 118 | [INFO] Created On: 2021-04-20T22:12:05.523378Z 119 | [INFO] Default User Set to hadoop:hadoop : PASS 120 | [INFO] Working Directory Set to /home/hadoop : PASS 121 | [INFO] Entrypoint Set to /usr/bin/entrypoint.sh : PASS 122 | [INFO] SPARK_HOME is set with value: /usr/lib/spark : PASS 123 | [INFO] JAVA_HOME is set with value: /etc/alternatives/jre : PASS 124 | [ERROR] mockito-all MUST be in /usr/lib/hadoop/lib : FAIL 125 | [ERROR] servlet-api MUST be in /usr/lib/hadoop/lib : FAIL 126 | [ERROR] spotbugs-annotations MUST be in /usr/lib/hadoop/lib : FAIL 127 | [ERROR] stax-api MUST be in /usr/lib/hadoop/lib : FAIL 128 | [ERROR] xmlenc MUST be in /usr/lib/hadoop/lib : FAIL 129 | [INFO] File structure test for bin-files in /usr/bin: PASS 130 | ... Start Running Sample Spark Job 131 | [ERROR] Sample Spark Job Test with local:///usr/lib/spark/examples/jars/spark-examples.jar : FAIL 132 | ----------------------------------------------------------------- 133 | Custom Image Validation Failed. Please see individual test results above for detailed information. 134 | ----------------------------------------------------------------- 135 | ``` 136 | 137 | ## Support 138 | 139 | This tool supports the following releases: 140 | Supported Versions in Repo: 141 | 142 | | Releases | Amazon EMR on EKS release versions | Container image tags | 143 | |:--------------------------:|:----------------------------------:|:--------------------:| 144 | | Amazon EMR 7.2.0 releases | emr-7.2.0-latest | emr-7.2.0:latest | 145 | | Amazon EMR 7.1.0 releases | emr-7.1.0-latest | emr-7.1.0:latest | 146 | | Amazon EMR 7.0.0 releases | emr-7.0.0-latest | emr-7.0.0:latest | 147 | | Amazon EMR 6.15.0 releases | emr-6.15.0-latest | emr-6.15.0:latest | 148 | | Amazon EMR 6.14.0 releases | emr-6.14.0-latest | emr-6.14.0:latest | 149 | | Amazon EMR 6.13.0 releases | emr-6.13.0-latest | emr-6.13.0:latest | 150 | | Amazon EMR 6.12.0 releases | emr-6.12.0-latest | emr-6.12.0:latest | 151 | | Amazon EMR 6.11.0 releases | emr-6.11.0-latest | emr-6.11.0:latest | 152 | | Amazon EMR 6.10.0 releases | emr-6.10.0-latest | emr-6.10.0:latest | 153 | | Amazon EMR 6.9.0 releases | emr-6.9.0-latest | emr-6.9.0:latest | 154 | | Amazon EMR 6.8.0 releases | emr-6.8.0-latest | emr-6.8.0:latest | 155 | | Amazon EMR 6.7.0 releases | emr-6.7.0-latest | emr-6.7.0:latest | 156 | | Amazon EMR 6.6.0 releases | emr-6.6.0-latest | emr-6.6.0:latest | 157 | | | emr-6.6.0-20220411 | emr-6.6.0:20220411 | 158 | | Amazon EMR 6.5.0 releases | emr-6.5.0-latest | emr-6.5.0:latest | 159 | | | emr-6.5.0-20211119 | emr-6.5.0:20211119 | 160 | | Amazon EMR 6.4.0 releases | emr-6.4.0-latest | emr-6.4.0:latest | 161 | | | emr-6.4.0-20210830 | emr-6.4.0:20210830 | 162 | | Amazon EMR 6.3.0 releases | emr-6.3.0-latest | emr-6.3.0:latest | 163 | | | emr-6.3.0-20210429 | emr-6.3.0:20210429 | 164 | | Amazon EMR 6.2.0 releases | emr-6.2.0-latest | emr-6.2.0-20210129 | 165 | | | emr-6.2.0-20210129 | emr-6.2.0-20210129 | 166 | | | emr-6.2.0-20201218 | emr-6.2.0-20201218 | 167 | | | emr-6.2.0-20201201 | emr-6.2.0-20201201 | 168 | | Amazon EMR 5.35.0 releases | emr-5.35.0-latest | emr-5.35.0:latest | 169 | | | emr-5.35.0-20220307 | emr-5.35.0:20220307 | 170 | | Amazon EMR 5.34.0 releases | emr-5.34.0-latest | emr-5.34.0:latest | 171 | | | emr-5.34.0-20211208 | emr-5.34.0:20211208 | 172 | | Amazon EMR 5.33.0 releases | emr-5.33.0-latest | emr-5.33.0-20210323 | 173 | | | emr-5.33.0-20210323 | emr-5.33.0-20210323 | 174 | | Amazon EMR 5.32.0 releases | emr-5.32.0-latest | emr-5.32.0-20210129 | 175 | | | emr-5.32.0-20210129 | emr-5.32.0-20210129 | 176 | | | emr-5.32.0-20201218 | emr-5.32.0-20201218 | 177 | | | emr-5.32.0-20201201 | emr-5.32.0-20201201 | 178 | 179 | Supported Versions in [Releases](https://github.com/awslabs/amazon-emr-on-eks-custom-image-cli/releases) for Mac/Linux/Windows: 180 | 181 | | Releases | Amazon EMR on EKS release versions | Container image tags | 182 | |:--------------------------:|:----------------------------------:|:--------------------:| 183 | | Amazon EMR 7.2.0 releases | emr-7.2.0-latest | emr-7.2.0:latest | 184 | | Amazon EMR 7.1.0 releases | emr-7.1.0-latest | emr-7.1.0:latest | 185 | | Amazon EMR 7.0.0 releases | emr-7.0.0-latest | emr-7.0.0:latest | 186 | | Amazon EMR 6.15.0 releases | emr-6.15.0-latest | emr-6.15.0:latest | 187 | | Amazon EMR 6.14.0 releases | emr-6.14.0-latest | emr-6.14.0:latest | 188 | | Amazon EMR 6.13.0 releases | emr-6.13.0-latest | emr-6.13.0:latest | 189 | | Amazon EMR 6.12.0 releases | emr-6.12.0-latest | emr-6.12.0:latest | 190 | | Amazon EMR 6.11.0 releases | emr-6.11.0-latest | emr-6.11.0:latest | 191 | | Amazon EMR 6.10.0 releases | emr-6.10.0-latest | emr-6.10.0:latest | 192 | | Amazon EMR 6.9.0 releases | emr-6.9.0-latest | emr-6.9.0:latest | 193 | | Amazon EMR 6.8.0 releases | emr-6.8.0-latest | emr-6.8.0:latest | 194 | | Amazon EMR 6.7.0 releases | emr-6.7.0-latest | emr-6.7.0:latest | 195 | | Amazon EMR 6.6.0 releases | emr-6.6.0-latest | emr-6.6.0:latest | 196 | | | emr-6.6.0-20220411 | emr-6.6.0:20220411 | 197 | | Amazon EMR 6.5.0 releases | emr-6.5.0-latest | emr-6.5.0:latest | 198 | | | emr-6.5.0-20211119 | emr-6.5.0:20211119 | 199 | | Amazon EMR 6.4.0 releases | emr-6.4.0-latest | emr-6.4.0:latest | 200 | | | emr-6.4.0-20210830 | emr-6.4.0:20210830 | 201 | | Amazon EMR 6.3.0 releases | emr-6.3.0-latest | emr-6.3.0:latest | 202 | | | emr-6.3.0-20210429 | emr-6.3.0:20210429 | 203 | | Amazon EMR 6.2.0 releases | emr-6.2.0-latest | emr-6.2.0-20210129 | 204 | | | emr-6.2.0-20210129 | emr-6.2.0-20210129 | 205 | | | emr-6.2.0-20201218 | emr-6.2.0-20201218 | 206 | | | emr-6.2.0-20201201 | emr-6.2.0-20201201 | 207 | | Amazon EMR 5.35.0 releases | emr-5.35.0-latest | emr-5.35.0:latest | 208 | | | emr-5.35.0-20220307 | emr-5.35.0:20220307 | 209 | | Amazon EMR 5.34.0 releases | emr-5.34.0-latest | emr-5.34.0:latest | 210 | | | emr-5.34.0-20211208 | emr-5.34.0:20211208 | 211 | | Amazon EMR 5.33.0 releases | emr-5.33.0-latest | emr-5.33.0-20210323 | 212 | | | emr-5.33.0-20210323 | emr-5.33.0-20210323 | 213 | | Amazon EMR 5.32.0 releases | emr-5.32.0-latest | emr-5.32.0-20210129 | 214 | | | emr-5.32.0-20210129 | emr-5.32.0-20210129 | 215 | | | emr-5.32.0-20201218 | emr-5.32.0-20201218 | 216 | | | emr-5.32.0-20201201 | emr-5.32.0-20201201 | 217 | 218 | You can find more release information [Here](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/docker-custom-images-tag.html). 219 | 220 | ## Security 221 | 222 | If you discover a potential security issue in this project, or think you may have discovered a security issue, we request you to notify AWS Security via our vulnerability [reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do not create a public GitHub issue. 223 | 224 | --------------------------------------------------------------------------------