├── .dockerignore ├── .gitignore ├── .travis.yml ├── Dockerfile ├── Jenkinsfile ├── LICENSE ├── Makefile ├── README.md ├── app ├── __init__.py ├── __main__.py ├── client │ ├── __init__.py │ ├── build_runner.py │ ├── cluster_api_client.py │ └── service_runner.py ├── common │ ├── __init__.py │ ├── build_artifact.py │ ├── cluster_service.py │ ├── console_output.py │ ├── console_output_segment.py │ └── metrics.py ├── deployment │ ├── __init__.py │ ├── deploy_target.py │ ├── remote_master_service.py │ ├── remote_service.py │ └── remote_slave_service.py ├── master │ ├── __init__.py │ ├── atom.py │ ├── atom_grouper.py │ ├── atomizer.py │ ├── build.py │ ├── build_fsm.py │ ├── build_request.py │ ├── build_request_handler.py │ ├── build_scheduler.py │ ├── build_scheduler_pool.py │ ├── build_store.py │ ├── cluster_master.py │ ├── cluster_runner_config.py │ ├── job_config.py │ ├── slave.py │ ├── slave_allocator.py │ ├── subjob.py │ ├── subjob_calculator.py │ └── time_based_atom_grouper.py ├── project_type │ ├── __init__.py │ ├── directory.py │ ├── git.py │ └── project_type.py ├── slave │ ├── __init__.py │ ├── cluster_slave.py │ └── subjob_executor.py ├── subcommands │ ├── __init__.py │ ├── build_subcommand.py │ ├── deploy_subcommand.py │ ├── master_subcommand.py │ ├── service_subcommand.py │ ├── shutdown_subcommand.py │ ├── slave_subcommand.py │ ├── stop_subcommand.py │ └── subcommand.py ├── util │ ├── __init__.py │ ├── analytics.py │ ├── app_info.py │ ├── argument_parsing.py │ ├── autoversioning.py │ ├── conf │ │ ├── __init__.py │ │ ├── base_config_loader.py │ │ ├── config_file.py │ │ ├── configuration.py │ │ ├── deploy_config_loader.py │ │ ├── master_config_loader.py │ │ ├── slave_config_loader.py │ │ └── stop_config_loader.py │ ├── counter.py │ ├── decorators.py │ ├── event_log.py │ ├── exceptions.py │ ├── fs.py │ ├── log.py │ ├── network.py │ ├── ordered_set_queue.py │ ├── pagination.py │ ├── poll.py │ ├── process_utils.py │ ├── safe_thread.py │ ├── secret.py │ ├── session_id.py │ ├── shell │ │ ├── __init__.py │ │ ├── local_shell_client.py │ │ ├── remote_shell_client.py │ │ ├── shell_client.py │ │ └── shell_client_factory.py │ ├── single_use_coin.py │ ├── singleton.py │ ├── unhandled_exception_handler.py │ ├── url_builder.py │ └── util.py └── web_framework │ ├── __init__.py │ ├── api_version_handler.py │ ├── cluster_application.py │ ├── cluster_base_handler.py │ ├── cluster_master_application.py │ ├── cluster_slave_application.py │ └── route_node.py ├── appveyor.yml ├── bin ├── git_askpass.sh └── git_ssh.sh ├── clusterrunner.yaml ├── conf └── default_clusterrunner.conf ├── dev-requirements.in ├── dev-requirements.txt ├── examples └── directory job │ └── clusterrunner.yaml ├── pylintrc ├── requirements.in ├── requirements.txt ├── setup.py ├── test ├── README.md ├── __init__.py ├── framework │ ├── __init__.py │ ├── base_integration_test_case.py │ ├── base_unit_test_case.py │ ├── comparators.py │ ├── functional │ │ ├── __init__.py │ │ ├── base_functional_test_case.py │ │ ├── fs_item.py │ │ └── functional_test_cluster.py │ └── pylint │ │ ├── __init__.py │ │ └── clusterrunner_token_checker.py ├── functional │ ├── __init__.py │ ├── heartbeat │ │ ├── __init__.py │ │ └── test_heartbeat.py │ ├── job_configs.py │ ├── master │ │ ├── __init__.py │ │ ├── test_api_version_requests.py │ │ ├── test_build_cancellation.py │ │ ├── test_console_output.py │ │ ├── test_deallocation_and_allocation_of_slaves_mid_build.py │ │ ├── test_endpoints.py │ │ ├── test_http_timeout.py │ │ └── test_shutdown.py │ └── test_cluster_basic.py ├── integration │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── test_build_artifact.py │ │ └── test_console_output.py │ └── master │ │ └── __init__.py └── unit │ ├── __init__.py │ ├── client │ ├── __init__.py │ ├── test_build_runner.py │ ├── test_config_validator.py │ └── test_service_runner.py │ ├── common │ ├── __init__.py │ ├── test_build_artifact.py │ └── test_cluster_service.py │ ├── deployment │ ├── __init__.py │ ├── test_remote_master_service.py │ └── test_remote_slave_service.py │ ├── master │ ├── __init__.py │ ├── test_atomizer.py │ ├── test_build.py │ ├── test_build_request_handler.py │ ├── test_build_scheduler.py │ ├── test_cluster_master.py │ ├── test_cluster_runner_config.py │ ├── test_job_config.py │ ├── test_slave.py │ ├── test_slave_allocator.py │ ├── test_subjob.py │ ├── test_subjob_calculator.py │ └── test_time_based_atom_grouper.py │ ├── project_type │ ├── __init__.py │ ├── test_directory.py │ ├── test_git.py │ └── test_project_type.py │ ├── slave │ ├── __init__.py │ ├── test_cluster_slave.py │ └── test_subjob_executor.py │ ├── subcommands │ ├── __init__.py │ ├── test_build_subcommand.py │ ├── test_deploy_subcommand.py │ └── test_stop_subcommand.py │ ├── test_main.py │ ├── test_test.py │ ├── util │ ├── __init__.py │ ├── conf │ │ ├── __init__.py │ │ ├── test_base_config_loader.py │ │ ├── test_configuration.py │ │ ├── test_master_config_loader.py │ │ └── test_slave_config_loader.py │ ├── shell │ │ ├── __init__.py │ │ ├── test_factory.py │ │ ├── test_local_shell_client.py │ │ ├── test_remote_shell_client.py │ │ └── test_shell_client.py │ ├── test_autoversioning.py │ ├── test_decorators.py │ ├── test_event_log.py │ ├── test_fs.py │ ├── test_network.py │ ├── test_process_utils.py │ ├── test_safe_thread.py │ ├── test_secret.py │ ├── test_session_id.py │ ├── test_single_use_coin.py │ ├── test_singleton.py │ ├── test_unhandled_exception_handler.py │ └── test_url_builder.py │ └── web_framework │ ├── __init__.py │ ├── test_cluster_base_handler.py │ └── test_route_node.py └── windows ├── deploy ├── Makefile ├── README.md ├── deploy_clusterrunner.yml ├── files │ ├── clusterrunner.conf │ └── run_process_nohup.ps1 ├── group_vars │ └── clusterrunner_nodes.yml └── hosts ├── install.ps1 └── run_cr_unit_on_cr.cmd /.dockerignore: -------------------------------------------------------------------------------- 1 | # It is normal to exclude the .git/ repo, however ClusterRunner uses it for version calculation 2 | # so it must be included with the Docker image. 3 | # .git/ 4 | 5 | # virtualenv 6 | .python-version 7 | 8 | # test/build 9 | **/__pycache__/ 10 | *.egg-info/ 11 | .hypothesis/ 12 | build_results/ 13 | dist/ 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Packages 9 | *.egg 10 | *.egg-info 11 | dist 12 | build 13 | eggs 14 | parts 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | coverage.xml 30 | 31 | # Translations 32 | *.mo 33 | 34 | # Mr Developer 35 | .mr.developer.cfg 36 | .project 37 | .pydevproject 38 | 39 | # IntelliJ personal files 40 | .idea/* 41 | .idea/ClusterRunner.iml 42 | 43 | # ClusterRunner build and runtime files 44 | build_results/ 45 | clusterrunner.zip 46 | repos/ 47 | results/ 48 | slave_results/ 49 | .hypothesis/ 50 | 51 | # virtualenv 52 | .python-version 53 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "3.4" 5 | 6 | # each line defined in env will create a new parallel test job on Travis 7 | env: 8 | - TARGETS="lint test-unit-with-coverage" 9 | - TARGETS="test-integration-with-coverage" 10 | - TARGETS="test-functional test-unit-via-clusterrunner" 11 | 12 | install: 13 | - make init-dev 14 | 15 | script: 16 | - export CR_VERBOSE=1 # turns on stdout logging for master and slave services during functional tests 17 | - make $TARGETS 18 | 19 | after_success: 20 | - pip install python-coveralls==2.5.0 # Do not put in requirements.txt; it will not install on Windows. 21 | - coveralls 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Docker workflow for building and packaging ClusterRunner to an RPM. 3 | # 4 | 5 | # STAGE 1: Official PEP 513 Python Manylinux (RHEL5) base with Python 3.4 enabled to create 6 | # linux_x86_64 pex. 7 | FROM quay.io/pypa/manylinux1_x86_64:2020-01-31-d8fa357 AS builder 8 | ENV PATH="/opt/python/cp34-cp34m/bin:${PATH}" 9 | 10 | WORKDIR /ClusterRunner 11 | 12 | COPY Makefile *requirements.txt ./ 13 | RUN make init-dev wheels 14 | 15 | COPY . . 16 | RUN make dist/clusterrunner 17 | 18 | # STAGE 2: CentOS 7 base w/ fpm to package pex into an rpm. 19 | FROM cdrx/fpm-centos:7 AS packager 20 | WORKDIR /root 21 | COPY . . 22 | COPY --from=builder /ClusterRunner/dist/clusterrunner ./dist/ 23 | COPY --from=builder /ClusterRunner/clusterrunner.egg-info/PKG-INFO ./clusterrunner.egg-info/ 24 | RUN make rpm 25 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | library("jenkins-pipeline-library") 2 | 3 | pipeline { 4 | agent { label 'docker' } 5 | 6 | stages { 7 | stage('Build') { 8 | 9 | steps { 10 | sh 'make clean docker-rpm' 11 | } 12 | } 13 | stage('Release') { 14 | steps { 15 | script { 16 | def rpmFiles = findFiles(glob: 'dist/*.rpm').each { f -> f.path } 17 | publishRPM(fileList: rpmFiles, repoName: 'productivity', repoPath: 'com/box/clusterrunner', noRpmNamePath: true) 18 | } 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/__init__.py -------------------------------------------------------------------------------- /app/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/client/__init__.py -------------------------------------------------------------------------------- /app/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/common/__init__.py -------------------------------------------------------------------------------- /app/common/cluster_service.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from app.common.build_artifact import BuildArtifact 4 | from app.util.exceptions import BadRequestError, ItemNotFoundError 5 | 6 | 7 | class ClusterService: 8 | """ 9 | The abstract base ClusterRunner service class. 10 | """ 11 | def get_console_output( 12 | self, 13 | build_id: int, 14 | subjob_id: int, 15 | atom_id: int, 16 | result_root: str, 17 | max_lines: int=50, 18 | offset_line: Optional[int]=None, 19 | ): 20 | """ 21 | Return the console output if it exists, raises an ItemNotFound error if not. 22 | 23 | On success, the response contains keys: offset_line, num_lines, total_num_lines, and content. 24 | 25 | e.g.: 26 | { 27 | 'offset_line': 0, 28 | 'num_lines': 50, 29 | 'total_num_lines': 167, 30 | 'content': 'Lorem ipsum dolor sit amet,\nconsectetur adipiscing elit,\n...', 31 | } 32 | 33 | :param build_id: build id 34 | :param subjob_id: subjob id 35 | :param atom_id: atom id 36 | :param result_root: the sys path to either the results or artifacts directory where results are stored. 37 | :param max_lines: The maximum total number of lines to return. If this max_lines + offset_line lines do not 38 | exist in the output file, just return what there is. 39 | :param offset_line: The line number (0-indexed) to start reading content for. If none is specified, we will 40 | return the console output starting from the end of the file. 41 | """ 42 | if offset_line is not None and offset_line < 0: 43 | raise BadRequestError('\'offset_line\' must be greater than or equal to zero.') 44 | if max_lines <= 0: 45 | raise BadRequestError('\'max_lines\' must be greater than zero.') 46 | 47 | segment = BuildArtifact.get_console_output( 48 | build_id, subjob_id, atom_id, result_root, max_lines, offset_line) 49 | 50 | if not segment: 51 | raise ItemNotFoundError('Console output does not exist on this host for ' 52 | 'build {}, subjob {}, atom {}.'.format(build_id, subjob_id, atom_id)) 53 | return { 54 | 'offset_line': segment.offset_line, 55 | 'num_lines': segment.num_lines, 56 | 'total_num_lines': segment.total_num_lines, 57 | 'content': segment.content, 58 | } 59 | -------------------------------------------------------------------------------- /app/common/console_output_segment.py: -------------------------------------------------------------------------------- 1 | class ConsoleOutputSegment(object): 2 | """ 3 | Represents a subset of lines from the console output of an atom. 4 | """ 5 | 6 | def __init__(self, offset_line, num_lines, total_num_lines, content): 7 | """ 8 | :param offset_line: The starting line number of this console output segment. 9 | :type offset_line: int 10 | :param num_lines: The number of lines returned by this segment. 11 | :type num_lines: int 12 | :param total_num_lines: The total number of lines that are in the console output (not just this segment). 13 | :type total_num_lines: int 14 | :param content: The actual string content of this segment of console output. 15 | :type content: str 16 | """ 17 | self.offset_line = offset_line 18 | self.num_lines = num_lines 19 | self.total_num_lines = total_num_lines 20 | self.content = content 21 | -------------------------------------------------------------------------------- /app/common/metrics.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from typing import Callable, Iterator, List 4 | 5 | from prometheus_client import Counter, Histogram, REGISTRY 6 | from prometheus_client.core import GaugeMetricFamily 7 | 8 | 9 | http_request_duration_seconds = Histogram( # pylint: disable=no-value-for-parameter 10 | 'http_request_duration_seconds', 11 | 'Latency of HTTP requests in seconds', 12 | ['method', 'endpoint', 'status'], 13 | buckets=(.005, .01, .05, .1, .25, .5, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 50.0, float('inf'))) 14 | 15 | build_state_duration_seconds = Histogram( # pylint: disable=no-value-for-parameter 16 | 'build_state_duration_seconds', 17 | 'Total amount of time a build spends in each build state', 18 | ['state']) 19 | 20 | serialized_build_time_seconds = Histogram( # pylint: disable=no-value-for-parameter 21 | 'serialized_build_time_seconds', 22 | 'Total amount of time that would have been consumed by builds if all work was done serially') 23 | 24 | internal_errors = Counter( 25 | 'internal_errors', 26 | 'Total number of internal errors', 27 | ['type']) 28 | 29 | 30 | class ErrorType(str, Enum): 31 | AtomizerFailure = 'AtomizerFailure' 32 | NetworkRequestFailure = 'NetworkRequestFailure' 33 | PostBuildFailure = 'PostBuildFailure' 34 | SubjobStartFailure = 'SubjobStartFailure' 35 | SubjobWriteFailure = 'SubjobWriteFailure' 36 | ZipFileCreationFailure = 'ZipFileCreationFailure' 37 | 38 | def __str__(self): 39 | """ 40 | Even though this class inherits from str, still include a __str__ method so that 41 | metrics in the /metrics endpoint appear as 42 | internal_errors{type="PostBuildFailure"} 1.0 43 | instead of 44 | internal_errors{type="ErrorType.PostBuildFailure"} 1.0 45 | """ 46 | return self.value 47 | 48 | 49 | class SlavesCollector: 50 | """ 51 | Prometheus collector to collect the total number of alive/dead/idle slaves connected to the master. 52 | collect() is called once each time prometheus scrapes the /metrics endpoint. This class ensures that 53 | 1. The list of slaves only gets iterated through once per scrape 54 | 2. A single slave is is not double counted in 2 states 55 | """ 56 | 57 | _slaves_collector_is_registered = False 58 | 59 | def __init__(self, get_slaves: Callable[[], List['app.master.slave.Slave']]): 60 | self._get_slaves = get_slaves 61 | 62 | def collect(self) -> Iterator[GaugeMetricFamily]: 63 | active, idle, dead = 0, 0, 0 64 | for slave in self._get_slaves(): 65 | if slave.is_alive(use_cached=True) and slave.current_build_id is not None: 66 | active += 1 67 | elif slave.is_alive(use_cached=True) and slave.current_build_id is None: 68 | idle += 1 69 | elif not slave.is_alive(use_cached=True) and not slave.is_shutdown(): 70 | # Slave is not alive and was not deliberately put in shutdown mode. Count it as dead. 71 | dead += 1 72 | else: 73 | # If not slave.is_alive() and slave.is_shutdown() = True then we have deliberately 74 | # and gracefully killed the slave. We do not want to categorize such a slave as 'dead' 75 | pass 76 | 77 | slaves_gauge = GaugeMetricFamily('slaves', 'Total number of slaves', labels=['state']) 78 | slaves_gauge.add_metric(['active'], active) 79 | slaves_gauge.add_metric(['idle'], idle) 80 | slaves_gauge.add_metric(['dead'], dead) 81 | yield slaves_gauge 82 | 83 | @classmethod 84 | def register_slaves_metrics_collector(cls, get_slaves: Callable[[], List['app.master.slave.Slave']]): 85 | if not cls._slaves_collector_is_registered: 86 | REGISTRY.register(SlavesCollector(get_slaves)) 87 | cls._slaves_collector_is_registered = True 88 | -------------------------------------------------------------------------------- /app/deployment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/deployment/__init__.py -------------------------------------------------------------------------------- /app/deployment/deploy_target.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from app.util.shell.shell_client_factory import ShellClientFactory 4 | 5 | 6 | class DeployTarget(object): 7 | """ 8 | A "deploy target" is the host to which clusterrunner will be deployed to. Deployment entails putting 9 | in place the clusterrunner binaries and configuration only. This class is not responsible for manipulating 10 | processes and stopping/starting services. 11 | """ 12 | 13 | def __init__(self, host, username): 14 | """ 15 | :param host: the fully qualified hostname of the host to deploy to 16 | :type host: str 17 | :param username: the user who is executing this process and whose ssh credentials will be used 18 | :type username: str 19 | """ 20 | self._host = host 21 | self._username = username 22 | self._shell_client = ShellClientFactory.create(host, username) 23 | 24 | def deploy_binary(self, source_tar, dest_dir): 25 | """ 26 | Given the tarred/zipped binary directory on the current host, move to the self.host and unzip 27 | it into the dest_dir on the remote host. This method will create the directory if it doesn't exist, 28 | and will overwrite the directory if it already exists. 29 | 30 | :param source_tar: the path the tar-zipped clusterrunner binary is on the current host 31 | :type source_tar: str 32 | :param dest_dir: the path to place the clusterrunner binaries on the deploy target host 33 | :type dest_dir: str 34 | """ 35 | parent_dest_dir = os.path.dirname(dest_dir) 36 | self._shell_client.exec_command('rm -rf {0}; mkdir -p {0}'.format(dest_dir), error_on_failure=True) 37 | self._shell_client.copy(source_tar, '{}/clusterrunner.tgz'.format(parent_dest_dir), error_on_failure=True) 38 | self._shell_client.exec_command( 39 | command='tar zxvf {}/clusterrunner.tgz -C {}'.format(parent_dest_dir, dest_dir), 40 | error_on_failure=True 41 | ) 42 | 43 | def deploy_conf(self, source_path, dest_path): 44 | """ 45 | Given a conf file on the local host, send it to the remote deploy target host, and set the 46 | proper permissions. 47 | 48 | :param source_path: the path to the clusterrunner conf file on the current host 49 | :type source_path: str 50 | :param dest_path: the path to place the clusterrunner conf file on the deploy target host 51 | :type dest_path: str 52 | """ 53 | if not os.path.exists(source_path): 54 | raise RuntimeError('Expected configuration file to exist in {}, but does not.'.format(source_path)) 55 | 56 | self._shell_client.copy(source_path, dest_path) 57 | # Must set permissions of conf to '600' for security purposes. 58 | self._shell_client.exec_command('chmod 600 {}'.format(dest_path), error_on_failure=True) 59 | -------------------------------------------------------------------------------- /app/deployment/remote_master_service.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from app.client.service_runner import ServiceRunner 4 | from app.deployment.remote_service import RemoteService 5 | 6 | 7 | class RemoteMasterService(RemoteService): 8 | """ 9 | This class serves to start the master service remotely. 10 | """ 11 | # Number of seconds to wait for master service to respond after starting 12 | _MASTER_SERVICE_TIMEOUT_SEC = 45 13 | # The number of times to retry starting the master daemon 14 | _MASTER_SERVICE_START_RETRIES = 3 15 | 16 | def start_and_block_until_up(self, port, timeout_sec=_MASTER_SERVICE_TIMEOUT_SEC): 17 | """ 18 | Start the clusterrunner master service and block until the master responds to web requests. Times out 19 | and throws an exception after timeout_sec. 20 | 21 | :param port: the port that the master service will run on 22 | :type port: int 23 | :param timeout_sec: number of seconds to wait for the master to respond before timing out 24 | :type timeout_sec: int 25 | """ 26 | # Start the master service daemon 27 | master_service_cmd = 'nohup {} master --port {} &'.format(self._executable_path, str(port)) 28 | 29 | # There are cases when 'clusterrunner deploy' fails, and there is no clusterrunner master service process 30 | # to be seen--but the fix is to just re-run the command. 31 | for i in range(self._MASTER_SERVICE_START_RETRIES): 32 | self._shell_client.exec_command(master_service_cmd, async=True) 33 | # Give the service a second to start up 34 | time.sleep(1) 35 | 36 | if self._is_process_running(self._executable_path): 37 | break 38 | else: 39 | self._logger.warning('Master service process failed to start on try {}, host {}'.format(i, self.host)) 40 | 41 | if not self._is_process_running(self._executable_path): 42 | self._logger.error('Master service process failed to start on host {}.'.format(self.host)) 43 | raise SystemExit(1) 44 | 45 | # Check to see if the master service is responding to http requests 46 | master_service_url = '{}:{}'.format(self.host, str(port)) 47 | master_service = ServiceRunner(master_service_url, main_executable=self._executable_path) 48 | 49 | if not master_service.is_up(master_service_url, timeout=timeout_sec): 50 | self._logger.error('Master service process exists on {}, but service on {} failed to respond.'.format( 51 | self.host, master_service_url)) 52 | raise SystemExit(1) 53 | 54 | def _is_process_running(self, command): 55 | """ 56 | Is a process that contains the string command running on the remote host? 57 | 58 | :param command: The command substring to search for. 59 | :type command: str 60 | :rtype: bool 61 | """ 62 | # Replace first char of command, 'n', with '[n]' to prevent the grep call from showing up in search results 63 | command = '[{}]'.format(command[0]) + command[1:] 64 | 65 | # Because this shell_client call can potentially be remote, we cannot use the psutil library, and 66 | # must instead perform shell commands directly. 67 | ps_search_cmd = 'ps ax | grep \'{}\''.format(command) 68 | ps_search_response = self._shell_client.exec_command(ps_search_cmd, async=False) 69 | output = ps_search_response.raw_output.decode("utf-8").split("\n") 70 | 71 | for output_line in output: 72 | if len(output_line.strip()) == 0: 73 | continue 74 | return True 75 | 76 | return False 77 | -------------------------------------------------------------------------------- /app/deployment/remote_service.py: -------------------------------------------------------------------------------- 1 | from app.util.log import get_logger 2 | from app.util.shell.shell_client_factory import ShellClientFactory 3 | 4 | 5 | class RemoteService(object): 6 | """ 7 | Parent class for manipulating clusterrunner services on remote hosts (through ssh). 8 | """ 9 | 10 | def __init__(self, host, username, executable_path): 11 | """ 12 | :param host: the fully qualified hostname of the host to deploy to 13 | :type host: str 14 | :param username: the user who is executing this process and whose ssh credentials will be used 15 | :type username: str 16 | :param executable_path: the path to the clusterrunner executable on the remote host 17 | :type executable_path: str 18 | """ 19 | self._logger = get_logger(__name__) 20 | self.host = host 21 | self._username = username 22 | self._executable_path = executable_path 23 | self._shell_client = ShellClientFactory.create(host, username) 24 | 25 | def stop(self): 26 | """ 27 | Stop all clusterrunner services on this machine. This functionality is in the base class because it 28 | should be common across all possible subclasses. 29 | """ 30 | response = self._shell_client.exec_command('{} stop'.format(self._executable_path), async=False) 31 | 32 | if not response.is_success(): 33 | self._logger.error('clusterrunner stop failed on host {} with output: {}, error: {}'.format( 34 | self.host, response.raw_output, response.raw_error)) 35 | -------------------------------------------------------------------------------- /app/deployment/remote_slave_service.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | from app.deployment.remote_service import RemoteService 4 | 5 | 6 | class RemoteSlaveService(RemoteService): 7 | """ 8 | This class serves to start the slave service remotely. 9 | """ 10 | 11 | def start(self, master_host, master_port, slave_port, num_executors): 12 | """ 13 | Start the clusterrunner master service and block until the master responds to web requests. Times out 14 | and throws an exception after timeout_sec. 15 | 16 | :param master_host: the host that the master service is running on 17 | :type master_host: str 18 | :param master_port: the port that the master service is running on 19 | :type master_port: int 20 | :param slave_port: the port that this slave service will run on 21 | :type slave_port: int 22 | :param num_executors: the number of concurrent executors that will run in this slave service 23 | :type num_executors: int 24 | """ 25 | if master_host == 'localhost': 26 | master_host = socket.gethostname() 27 | slave_args = '--master-url {}:{}'.format(master_host, str(master_port)) 28 | slave_args += ' --port {}'.format(str(slave_port)) 29 | slave_args += ' --num-executors {}'.format(str(num_executors)) 30 | self._shell_client.exec_command('nohup {} slave {} &'.format(self._executable_path, slave_args), async=True) 31 | -------------------------------------------------------------------------------- /app/master/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/master/__init__.py -------------------------------------------------------------------------------- /app/master/atom.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AtomState(str, Enum): 5 | NOT_STARTED = 'NOT_STARTED' 6 | IN_PROGRESS = 'IN_PROGRESS' 7 | COMPLETED = 'COMPLETED' 8 | 9 | 10 | class Atom(object): 11 | def __init__( 12 | self, 13 | command_string, 14 | expected_time=None, 15 | actual_time=None, 16 | exit_code=None, 17 | state=None, 18 | atom_id=None, 19 | subjob_id=None 20 | ): 21 | """ 22 | :type command_string: str 23 | :type expected_time: float | None 24 | :type actual_time: float | None 25 | :type exit_code: int | None 26 | :type state: `:class:AtomState` | None 27 | :type atom_id: int | None 28 | :type subjob_id: int | None 29 | """ 30 | self.command_string = command_string 31 | self.expected_time = expected_time 32 | self.actual_time = actual_time 33 | self.exit_code = exit_code 34 | self.state = state 35 | self.subjob_id = subjob_id 36 | self.id = atom_id 37 | 38 | def api_representation(self): 39 | return { 40 | 'command_string': self.command_string, 41 | 'expected_time': self.expected_time, 42 | 'actual_time': self.actual_time, 43 | 'exit_code': self.exit_code, 44 | 'state': self.state, 45 | 'id': self.id, 46 | 'subjob_id': self.subjob_id, 47 | } 48 | -------------------------------------------------------------------------------- /app/master/atom_grouper.py: -------------------------------------------------------------------------------- 1 | class AtomGrouper(object): 2 | def __init__(self, atoms, max_processes): 3 | """ 4 | :param atoms: the list of atoms 5 | :type atoms: list[app.master.atom.Atom] 6 | :param max_processes: the maximum number of processes requested for this job 7 | :type max_processes: int 8 | """ 9 | self._atoms = atoms 10 | self._max_processes = max_processes 11 | 12 | def groupings(self): 13 | """ 14 | Groups together atoms based on whatever strategy we choose. 15 | 16 | For now we are going with the default implementation, which is one atom per grouping. 17 | 18 | :return: a list of lists of atoms 19 | :rtype: list[list[app.master.atom.Atom]] 20 | """ 21 | return [[atom] for atom in self._atoms] 22 | -------------------------------------------------------------------------------- /app/master/atomizer.py: -------------------------------------------------------------------------------- 1 | from app.common.metrics import ErrorType, internal_errors 2 | from app.master.atom import Atom 3 | from app.util import log 4 | from app.util.process_utils import get_environment_variable_setter_command 5 | 6 | 7 | class Atomizer(object): 8 | """ 9 | An Atomizer takes care of translating the commands as parsed from the "atomizers" section of the project config 10 | into a list of atoms. The actual computed atoms are just environment variable export shell commands that are then 11 | prepended to whatever commands were specified in the "commands" section of the project config. 12 | """ 13 | def __init__(self, atomizer_dicts): 14 | """ 15 | :param atomizer_dicts: A list of dicts mapping atomizer env var names to atomizer commands 16 | :type atomizer_dicts: list[dict[str, str]] 17 | """ 18 | self._logger = log.get_logger(__name__) 19 | self._atomizer_dicts = atomizer_dicts 20 | 21 | def atomize_in_project(self, project_type): 22 | """ 23 | Translate the atomizer dicts that this instance was initialized with into a list of actual atom commands. This 24 | executes atomizer commands inside the given project in order to generate the atoms. 25 | 26 | :param project_type: The ProjectType instance in which to execute the atomizer commands 27 | :type project_type: ProjectType 28 | :return: The list of environment variable "export" atom commands 29 | :rtype: list[app.master.atom.Atom] 30 | """ 31 | atoms_list = [] 32 | for atomizer_dict in self._atomizer_dicts: 33 | for atomizer_var_name, atomizer_command in atomizer_dict.items(): 34 | atomizer_output, exit_code = project_type.execute_command_in_project(atomizer_command) 35 | if exit_code != 0: 36 | self._logger.error('Atomizer command "{}" for variable "{}" failed with exit code: {} and output:' 37 | '\n{}', atomizer_command, atomizer_var_name, exit_code, atomizer_output) 38 | internal_errors.labels(ErrorType.AtomizerFailure).inc() # pylint: disable=no-member 39 | raise AtomizerError('Atomizer command failed!') 40 | 41 | new_atoms = [] 42 | for atom_value in atomizer_output.strip().splitlines(): 43 | # For purposes of matching atom string values across builds, we must replace the generated/unique 44 | # project directory with its corresponding universal environment variable: '$PROJECT_DIR'. 45 | atom_value = atom_value.replace(project_type.project_directory, '$PROJECT_DIR') 46 | new_atoms.append(Atom(get_environment_variable_setter_command(atomizer_var_name, atom_value))) 47 | atoms_list.extend(new_atoms) 48 | 49 | return atoms_list 50 | 51 | 52 | class AtomizerError(Exception): 53 | """ 54 | Represents an error during atomization. 55 | """ 56 | -------------------------------------------------------------------------------- /app/master/build_request.py: -------------------------------------------------------------------------------- 1 | from app.util import util 2 | 3 | 4 | class BuildRequest(object): 5 | """ 6 | This class is a data object for the build request parameters provided by the user. It additionally provides 7 | validation. 8 | 9 | A requirement with the request that it must be able to specify where the cluster runner configuration file 10 | is going to live (clusterrunner.yaml). The cluster runner will look in the top level project directory 11 | for this file. If it doesn't exist, it is a fatal error and the build will be immediately aborted. 12 | 13 | The only field that is consistently required for ALL types is the "type" field. 14 | 15 | Currently we only support the 'git' project type. 16 | 17 | Git repo: 18 | { 19 | "type": "git", 20 | "url": "ssh://github.com/drobertduke/some-project", 21 | "branch": "master", 22 | [OPTIONAL] "job_name": "clusterrunner_configured_job", 23 | - This field is optional if the "config" section is specified 24 | [OPTIONAL] "atoms_override": ['export VAR="overridden_atom_value_1";', ...], 25 | [OPTIONAL] "hash": "123456789123456789123456789" 26 | [OPTIONAL] "config": { 27 | "commands" : [...], 28 | "atomizers" : {...}, 29 | "setup_build": [...], 30 | "teardown_build": [...], 31 | "max_executors": ..., 32 | "max_executors_per_slave": ... 33 | } 34 | } 35 | """ 36 | def __init__(self, build_parameters): 37 | """ 38 | :param build_parameters: A dictionary of request parameters 39 | :type build_parameters: dict[str, str] 40 | """ 41 | self._build_parameters = dict(build_parameters) or {} 42 | build_type = self._build_parameters.get('type') 43 | self._build_type = build_type.lower() if build_type else None 44 | 45 | def is_valid(self): 46 | """ 47 | Validate the request arguments to make sure that they have provided enough information and are valid. 48 | 49 | :return: whether the parameters are valid or not 50 | :rtype: bool 51 | """ 52 | if self._build_type is None: 53 | return False 54 | missing_parameters = set(self.required_parameters()) - self._build_parameters.keys() 55 | return self.is_valid_type() and not missing_parameters 56 | 57 | def is_valid_type(self): 58 | """ 59 | :return: whether the type is valid or not 60 | :rtype: bool 61 | """ 62 | if self._build_type is None: 63 | return False 64 | return util.get_project_type_subclass(self._build_type) is not None 65 | 66 | def required_parameters(self): 67 | """ 68 | :return: a list of the required parameters for this type of build 69 | :rtype: list[str] 70 | """ 71 | project_type_class = util.get_project_type_subclass(self._build_type) 72 | if project_type_class: 73 | return project_type_class.required_constructor_argument_names() 74 | 75 | return [] 76 | 77 | def build_parameters(self): 78 | """ 79 | :return: the build parameters 80 | :rtype: dict 81 | """ 82 | return self._build_parameters 83 | -------------------------------------------------------------------------------- /app/master/build_scheduler_pool.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | from threading import Lock 3 | 4 | from app.master.build_scheduler import BuildScheduler 5 | 6 | 7 | class BuildSchedulerPool(object): 8 | """ 9 | A BuildSchedulerPool creates and manages a group of BuildScheduler instances. 10 | Since there is a one-to-one relationship between Build and BuildScheduler, this 11 | class exists to make it easier to create and manage scheduler instances. 12 | """ 13 | def __init__(self): 14 | self._schedulers_by_build_id = {} 15 | self._scheduler_creation_lock = Lock() 16 | self._builds_waiting_for_slaves = Queue() 17 | 18 | def get(self, build): 19 | """ 20 | :type build: Build 21 | :rtype: BuildScheduler 22 | """ 23 | with self._scheduler_creation_lock: 24 | scheduler = self._schedulers_by_build_id.get(build.build_id()) 25 | if scheduler is None: 26 | # WIP(joey): clean up old schedulers (search through list and remove any with finished builds) 27 | scheduler = BuildScheduler(build, self) 28 | self._schedulers_by_build_id[build.build_id()] = scheduler 29 | 30 | return scheduler 31 | 32 | def next_prepared_build_scheduler(self): 33 | """ 34 | Get the scheduler for the next build that has successfully completed build preparation. 35 | 36 | This is a blocking call--if there are no more builds that have completed build preparation and this 37 | method gets invoked, the execution will hang until the next build has completed build preparation. 38 | 39 | :rtype: BuildScheduler 40 | """ 41 | build = self._builds_waiting_for_slaves.get() 42 | return self.get(build) 43 | 44 | def add_build_waiting_for_slaves(self, build): 45 | """ 46 | :type build: app.master.build.Build 47 | """ 48 | self._builds_waiting_for_slaves.put(build) 49 | -------------------------------------------------------------------------------- /app/master/build_store.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from itertools import islice 3 | from typing import List 4 | 5 | from app.master.build import Build 6 | from app.util.exceptions import ItemNotFoundError 7 | 8 | 9 | class BuildStore: 10 | """ 11 | Build storage service that stores and handles all builds. 12 | """ 13 | _all_builds_by_id = OrderedDict() 14 | 15 | @classmethod 16 | def get(cls, build_id: int) -> Build: 17 | """ 18 | Returns a build by id 19 | :param build_id: The id for the build whose status we are getting 20 | """ 21 | build = cls._all_builds_by_id.get(build_id) 22 | if build is None: 23 | raise ItemNotFoundError('Invalid build id: {}.'.format(build_id)) 24 | 25 | return build 26 | 27 | @classmethod 28 | def get_range(cls, start: int, end: int) -> List[Build]: 29 | """ 30 | Returns a list of all builds. 31 | :param start: The starting index of the requested build 32 | :param end: 1 + the index of the last requested element, although if this is greater than the total number 33 | of builds available the length of the returned list may be smaller than (end - start) 34 | """ 35 | requested_builds = islice(cls._all_builds_by_id, start, end) 36 | return [cls._all_builds_by_id[key] for key in requested_builds] 37 | 38 | @classmethod 39 | def add(cls, build: Build): 40 | """ 41 | Add new build to collection 42 | :param build: The build to add to the store 43 | """ 44 | cls._all_builds_by_id[build.build_id()] = build 45 | 46 | @classmethod 47 | def size(cls) -> int: 48 | """ 49 | Return the amount of builds within the store 50 | """ 51 | return len(cls._all_builds_by_id) 52 | -------------------------------------------------------------------------------- /app/master/cluster_runner_config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | from app.master.job_config import JobConfig 4 | from app.util import log 5 | 6 | 7 | class ClusterRunnerConfig(object): 8 | """ 9 | This class represents all of the ClusterRunner job definitions that live inside a single clusterrunner.yaml file. 10 | """ 11 | def __init__(self, raw_yaml_contents): 12 | """ 13 | :param raw_yaml_contents: Raw string contents of project clusterrunner.yaml file 14 | :type raw_yaml_contents: string 15 | """ 16 | self._job_configs = None 17 | self._logger = log.get_logger(__name__) 18 | self._raw_yaml_contents = raw_yaml_contents 19 | 20 | def get_job_config(self, job_name=None): 21 | """ 22 | Get a list of job configs contained in this cluster runner config, optionally filtered by job names. 23 | :param job_name: 24 | :type job_name: str | None 25 | :return: The specified job config 26 | :rtype: JobConfig 27 | """ 28 | if self._job_configs is None: 29 | self._parse_raw_config() 30 | 31 | if job_name is not None: 32 | if job_name not in self._job_configs: 33 | raise JobNotFoundError('The job "{}" was not found in the loaded config. ' 34 | 'Valid jobs are: {}'.format(job_name, self.get_job_names())) 35 | return self._job_configs[job_name] 36 | 37 | if len(self._job_configs) == 1: 38 | return list(self._job_configs.values())[0] 39 | 40 | raise JobNotSpecifiedError('Multiple jobs are defined in this project but you did not specify one. ' 41 | 'Specify one of the following job names: {}'.format(self.get_job_names())) 42 | 43 | def get_job_names(self): 44 | """ 45 | Get the names of all the jobs defined in the associated config file. 46 | :return: A list of all job names in the config file 47 | :rtype: list[str] 48 | """ 49 | if self._job_configs is None: 50 | self._parse_raw_config() 51 | 52 | return list(self._job_configs.keys()) 53 | 54 | def _parse_raw_config(self): 55 | """ 56 | Validate the parsed yaml structure. This method raises on validation errors. 57 | 58 | If validation is successful, add the job configs to this class instance. 59 | 60 | :param config: The parsed yaml data 61 | :type config: dict 62 | """ 63 | config = yaml.safe_load(self._raw_yaml_contents) 64 | 65 | if not isinstance(config, dict): 66 | raise ConfigParseError('The yaml config file could not be parsed to a dictionary') 67 | 68 | self._job_configs = {} 69 | 70 | for job_name, job_config_sections in config.items(): 71 | self._job_configs[job_name] = JobConfig.construct_from_dict(job_name, job_config_sections) 72 | 73 | if len(self._job_configs) == 0: 74 | raise ConfigParseError('No jobs found in the config.') 75 | 76 | 77 | class ConfigParseError(Exception): 78 | """ 79 | The cluster runner config could not be parsed 80 | """ 81 | 82 | 83 | class JobNotFoundError(Exception): 84 | """ 85 | The requested job could not be found in the config 86 | """ 87 | 88 | 89 | class JobNotSpecifiedError(Exception): 90 | """ 91 | Multiple jobs were found in the config but none were specified 92 | """ 93 | -------------------------------------------------------------------------------- /app/master/slave_allocator.py: -------------------------------------------------------------------------------- 1 | from app.util.log import get_logger 2 | from app.util.ordered_set_queue import OrderedSetQueue 3 | from app.util.safe_thread import SafeThread 4 | 5 | from app.master.slave import SlaveMarkedForShutdownError 6 | 7 | 8 | class SlaveAllocator(object): 9 | """ 10 | The SlaveAllocator class is responsible for allocating slaves to prepared builds. 11 | """ 12 | 13 | def __init__(self, scheduler_pool): 14 | """ 15 | :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool 16 | """ 17 | self._logger = get_logger(__name__) 18 | self._scheduler_pool = scheduler_pool 19 | self._idle_slaves = OrderedSetQueue() 20 | self._allocation_thread = SafeThread( 21 | target=self._slave_allocation_loop, name='SlaveAllocationLoop', daemon=True) 22 | 23 | def start(self): 24 | """ 25 | Start the infinite loop that will pull off prepared builds from a synchronized queue 26 | and allocate them slaves. 27 | """ 28 | if self._allocation_thread.is_alive(): 29 | raise RuntimeError('Error: slave allocation loop was asked to start when its already running.') 30 | self._allocation_thread.start() 31 | 32 | def _slave_allocation_loop(self): 33 | """ 34 | Builds wait in line for more slaves. This method executes in the background on another thread and 35 | watches for idle slaves, then gives them out to the waiting builds. 36 | """ 37 | while True: 38 | # This is a blocking call that will block until there is a prepared build. 39 | build_scheduler = self._scheduler_pool.next_prepared_build_scheduler() 40 | 41 | while build_scheduler.needs_more_slaves(): 42 | claimed_slave = self._idle_slaves.get() 43 | 44 | # Remove dead and shutdown slaves from the idle queue 45 | if claimed_slave.is_shutdown() or not claimed_slave.is_alive(use_cached=False): 46 | continue 47 | 48 | # The build may have completed while we were waiting for an idle slave, so check one more time. 49 | if build_scheduler.needs_more_slaves(): 50 | # Potential race condition here! If the build completes after the if statement is checked, 51 | # a slave will be allocated needlessly (and run slave.setup(), which can be significant work). 52 | self._logger.info('Allocating {} to build {}.', claimed_slave, build_scheduler.build_id) 53 | build_scheduler.allocate_slave(claimed_slave) 54 | else: 55 | self.add_idle_slave(claimed_slave) 56 | 57 | self._logger.info('Done allocating slaves for build {}.', build_scheduler.build_id) 58 | 59 | def add_idle_slave(self, slave): 60 | """ 61 | Add a slave to the idle queue. 62 | 63 | :type slave: Slave 64 | """ 65 | try: 66 | slave.mark_as_idle() 67 | self._idle_slaves.put(slave) 68 | except SlaveMarkedForShutdownError: 69 | pass 70 | -------------------------------------------------------------------------------- /app/master/subjob_calculator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from app.master.atom import Atom 5 | from app.master.atom_grouper import AtomGrouper 6 | from app.master.subjob import Subjob 7 | from app.master.time_based_atom_grouper import TimeBasedAtomGrouper 8 | from app.util import log 9 | 10 | 11 | def compute_subjobs_for_build(build_id, job_config, project_type): 12 | """ 13 | Calculate subjobs for a build. 14 | :type build_id: int 15 | :type job_config: JobConfig 16 | :param project_type: the project_type that the build is running in 17 | :type project_type: project_type.project_type.ProjectType 18 | :rtype: list[Subjob] 19 | """ 20 | # Users can override the list of atoms to be run in this build. If the atoms_override 21 | # was specified, we can skip the atomization step and use those overridden atoms instead. 22 | if project_type.atoms_override is not None: 23 | atoms_string_list = project_type.atoms_override 24 | atoms_list = [Atom(atom_string_value) for atom_string_value in atoms_string_list] 25 | else: 26 | atoms_list = job_config.atomizer.atomize_in_project(project_type) 27 | 28 | # Group the atoms together using some grouping strategy 29 | timing_file_path = project_type.timing_file_path(job_config.name) 30 | grouped_atoms = _grouped_atoms( 31 | atoms_list, 32 | job_config.max_executors, 33 | timing_file_path, 34 | project_type.project_directory 35 | ) 36 | 37 | # Generate subjobs for each group of atoms 38 | subjobs = [] 39 | for subjob_id, subjob_atoms in enumerate(grouped_atoms): 40 | # The atom id isn't calculated until the atom has been grouped into a subjob. 41 | for atom_id, atom in enumerate(subjob_atoms): 42 | atom.id = atom_id 43 | subjobs.append(Subjob(build_id, subjob_id, project_type, job_config, subjob_atoms)) 44 | return subjobs 45 | 46 | 47 | def _grouped_atoms(atoms, max_executors, timing_file_path, project_directory): 48 | """ 49 | Return atoms that are grouped for optimal CI performance. 50 | 51 | If a timing file exists, then use the TimeBasedAtomGrouper. 52 | If not, use the default AtomGrouper (groups each atom into its own subjob). 53 | 54 | :param atoms: all of the atoms to be run this time 55 | :type atoms: list[app.master.atom.Atom] 56 | :param max_executors: the maximum number of executors for this build 57 | :type max_executors: int 58 | :param timing_file_path: path to where the timing data file would be stored (if it exists) for this job 59 | :type timing_file_path: str 60 | :type project_directory: str 61 | :return: the grouped atoms (in the form of list of lists of strings) 62 | :rtype: list[list[app.master.atom.Atom]] 63 | """ 64 | atom_time_map = None 65 | 66 | if os.path.isfile(timing_file_path): 67 | with open(timing_file_path, 'r') as json_file: 68 | try: 69 | atom_time_map = json.load(json_file) 70 | except ValueError: 71 | logger = log.get_logger(__name__) 72 | logger.warning('Failed to load timing data from file that exists {}', timing_file_path) 73 | 74 | if atom_time_map is not None and len(atom_time_map) > 0: 75 | atom_grouper = TimeBasedAtomGrouper(atoms, max_executors, atom_time_map, project_directory) 76 | else: 77 | atom_grouper = AtomGrouper(atoms, max_executors) 78 | 79 | return atom_grouper.groupings() 80 | -------------------------------------------------------------------------------- /app/project_type/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/project_type/__init__.py -------------------------------------------------------------------------------- /app/project_type/directory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from platform import node 3 | 4 | from app.project_type.project_type import ProjectType 5 | from app.util.conf.configuration import Configuration 6 | from app.util.log import get_logger 7 | 8 | 9 | class Directory(ProjectType): 10 | """ 11 | Example API call to invoke a directory-type build. 12 | { 13 | "type": "directory", 14 | "project_directory": "examples/directory job", 15 | } 16 | """ 17 | def __init__(self, project_directory, config=None, job_name=None, build_project_directory=None, 18 | remote_files=None): 19 | """ 20 | Note: the first line of each parameter docstring will be exposed as command line argument documentation for the 21 | clusterrunner build client. 22 | 23 | :param project_directory: path to the directory that contains the project and clusterrunner.yaml 24 | :type project_directory: string 25 | :param config: a yaml string to be used in place of a clusterrunner.yaml 26 | :type config: string|None 27 | :param job_name: a list of job names we intend to run 28 | :type job_name: list [str] | None 29 | :param remote_files: dictionary mapping of output file to URL 30 | :type remote_files: dict[str, str] | None 31 | """ 32 | super().__init__(config, job_name, remote_files) 33 | self._logger = get_logger(__name__) 34 | self.project_directory = os.path.abspath(project_directory) 35 | self._logger.debug('Project directory is {}'.format(project_directory)) 36 | 37 | def _fetch_project(self): 38 | dir_exists = os.path.isdir(self.project_directory) 39 | if not dir_exists: 40 | raise RuntimeError('Could not find the directory "{}" on {}. Note that if you are running ClusterRunner ' 41 | 'on multiple hosts, "directory" type builds are not supported.' 42 | .format(self.project_directory, node())) 43 | 44 | def execute_command_in_project(self, *args, **kwargs): 45 | """ 46 | Execute a command inside the directory. See superclass for parameter documentation. 47 | """ 48 | if 'cwd' not in kwargs: 49 | kwargs['cwd'] = self.project_directory 50 | return super().execute_command_in_project(*args, **kwargs) 51 | 52 | def timing_file_path(self, job_name): 53 | """ 54 | Construct the sys path of the directory where the timing file should reside based on the project_directory. 55 | project_directory is the sys path of the project which contains the clusterrunner.yaml file. 56 | 57 | e.g.: 58 | Configuration['timings_directory'] = '/var/timings_directory' 59 | project_directory = '/Users/me/project' 60 | 61 | The final timing file sys path should be: 62 | '/var/timings_directory/Users/me/project' 63 | 64 | :type job_name: str 65 | :return: the absolute path to where the timing file for job_name SHOULD be. This method does not guarantee 66 | that the timing file exists. 67 | :rtype: string 68 | """ 69 | # cut off mount point and leading separator (e.g. '/' on POSIX or '\\' on Windows) 70 | # e.g. '/var/bar' would become 'var/bar' on POSIX and 'c:\\temp\\foo' would become 'temp\\foo' 71 | timings_subdirectory = os.path.splitdrive(self.project_directory)[1][len(os.sep):] 72 | return os.path.join( 73 | Configuration['timings_directory'], 74 | timings_subdirectory, 75 | '{}.timing.json'.format(job_name) 76 | ) 77 | 78 | def project_id(self): 79 | return self.project_directory 80 | -------------------------------------------------------------------------------- /app/slave/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/slave/__init__.py -------------------------------------------------------------------------------- /app/subcommands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/subcommands/__init__.py -------------------------------------------------------------------------------- /app/subcommands/build_subcommand.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from app.client.build_runner import BuildRunner 5 | from app.client.service_runner import ServiceRunner, ServiceRunError 6 | from app.subcommands.subcommand import Subcommand 7 | from app.util import log 8 | from app.util.conf.configuration import Configuration 9 | from app.util.network import Network 10 | from app.util.secret import Secret 11 | 12 | 13 | class BuildSubcommand(Subcommand): 14 | 15 | def run(self, log_level, master_url, remote_file=None, build_type=None, **request_params): 16 | """ 17 | Execute a build and wait for it to complete. 18 | 19 | :param log_level: the log level at which to do application logging (or None for default log level) 20 | :type log_level: str | None 21 | :param master_url: the url (specified by the user) of the master to which we should send the build 22 | :type master_url: str | None 23 | :param remote_file: a list of remote files where each element contains the output file name and the resource URL 24 | :type remote_file: list[list[str]] | None 25 | :param build_type: the build type of the request to be sent (e.g., "git", "directory"). If not specified 26 | will default to the "directory" project type. 27 | :type build_type: str | None 28 | :param request_params: key-value pairs to be provided as build parameters in the build request 29 | :type request_params: dict 30 | """ 31 | log_level = log_level or Configuration['log_level'] 32 | log.configure_logging(log_level=log_level, simplified_console_logs=True) 33 | request_params['type'] = build_type or request_params.get('type') or 'directory' 34 | 35 | if remote_file: 36 | request_params['remote_files'] = {name: url for name, url in remote_file} 37 | 38 | operational_master_url = master_url or '{}:{}'.format(Configuration['hostname'], Configuration['port']) 39 | 40 | # If running a single master, single slave--both on localhost--we need to launch services locally. 41 | if master_url is None and Network.are_hosts_same(Configuration['master_hostname'], 'localhost') \ 42 | and len(Configuration['slaves']) == 1 \ 43 | and Network.are_hosts_same(Configuration['slaves'][0], 'localhost'): 44 | self._start_local_services_if_needed(operational_master_url) 45 | 46 | if request_params['type'] == 'directory': 47 | request_params['project_directory'] = request_params.get('project_directory') or os.getcwd() 48 | 49 | runner = BuildRunner(master_url=operational_master_url, request_params=request_params, secret=Secret.get()) 50 | 51 | if not runner.run(): 52 | sys.exit(1) 53 | 54 | def _start_local_services_if_needed(self, master_url): 55 | """ 56 | In the case that: 57 | 58 | - the master url is localhost 59 | - the slaves list is just localhost 60 | 61 | Start a master and slave service instance locally, if the master is not already running. 62 | 63 | :param master_url: service url (with port number) 64 | :type master_url: str 65 | """ 66 | service_runner = ServiceRunner(master_url) 67 | if service_runner.is_master_up(): 68 | return 69 | try: 70 | service_runner.run_master() 71 | service_runner.run_slave() 72 | except ServiceRunError as ex: 73 | self._logger.error(str(ex)) 74 | sys.exit(1) 75 | -------------------------------------------------------------------------------- /app/subcommands/master_subcommand.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from app.master.cluster_master import ClusterMaster 4 | from app.subcommands.service_subcommand import ServiceSubcommand 5 | from app.util import analytics, log 6 | from app.util.conf.configuration import Configuration 7 | from app.web_framework.cluster_master_application import ClusterMasterApplication 8 | 9 | 10 | class MasterSubcommand(ServiceSubcommand): 11 | _THREAD_NAME = 'MasterTornadoThread' 12 | 13 | def async_run(self, port, log_level, eventlog_file): 14 | """ 15 | Run a ClusterRunner master service. 16 | 17 | :param port: the port on which to run the slave service 18 | :type port: int | None 19 | :param log_level: the log level at which to do application logging (or None for default log level) 20 | :type log_level: str | None 21 | :param eventlog_file: an optional alternate file in which to write event logs 22 | :type eventlog_file: str | None 23 | """ 24 | port = port or Configuration['port'] 25 | log_level = log_level or Configuration['log_level'] 26 | eventlog_file = eventlog_file or Configuration['eventlog_file'] 27 | 28 | log.configure_logging(log_level=log_level, log_file=Configuration['log_file']) 29 | analytics.initialize(eventlog_file) 30 | analytics.record_event(analytics.SERVICE_STARTED, service='master') 31 | 32 | cluster_master = ClusterMaster() 33 | 34 | application = ClusterMasterApplication(cluster_master) 35 | 36 | ioloop = self._start_application(application, port) 37 | 38 | self._write_pid_file(Configuration['master_pid_file']) 39 | 40 | # log startup message once ioloop is running 41 | hostname = Configuration['hostname'] 42 | log_startup = functools.partial(self._logger.info, 'Master service is running on {}:{}.'.format(hostname, port)) 43 | ioloop.add_callback(log_startup) 44 | 45 | # start heartbeat tracker once ioloop starts 46 | start_master_heartbeat_tracker = functools.partial(cluster_master.start_heartbeat_tracker_thread) 47 | ioloop.add_callback(start_master_heartbeat_tracker) 48 | 49 | ioloop.start() # this call blocks until the server is stopped 50 | ioloop.close(all_fds=True) # all_fds=True is necessary here to make sure connections don't hang 51 | self._logger.notice('Master server was stopped.') 52 | -------------------------------------------------------------------------------- /app/subcommands/service_subcommand.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import os 3 | import sys 4 | import tornado.ioloop 5 | from tornado.httpserver import HTTPServer 6 | 7 | from app.util.conf.configuration import Configuration 8 | from app.subcommands.subcommand import Subcommand 9 | from app.util import fs 10 | from app.util.safe_thread import SafeThread 11 | from app.util.unhandled_exception_handler import UnhandledExceptionHandler 12 | 13 | 14 | class ServiceSubcommand(Subcommand): 15 | """ 16 | Base class for Master and Slave subcommands. 17 | """ 18 | _THREAD_NAME = None 19 | 20 | def run(self, *args, **kwargs): 21 | app_thread = SafeThread( 22 | name=self._THREAD_NAME, 23 | target=self.async_run, 24 | args=args, 25 | kwargs=kwargs, 26 | ) 27 | app_thread.start() 28 | app_thread.join() 29 | 30 | def async_run(self, *args, **kwargs): 31 | raise NotImplementedError 32 | 33 | def _start_application(self, application, port): 34 | # Note: No significant application logic should be executed before this point. The call to application.listen() 35 | # will raise an exception if another process is using the same port. We rely on this exception to force us to 36 | # exit if there are any port conflicts. 37 | try: 38 | # If SSL cert and key files are provided in configuration, ClusterRunner wil start with HTTPS protocol. 39 | # Otherwise ClusterRunner will start with HTTP protocol. 40 | server = HTTPServer(application, ssl_options=self._get_https_options()) 41 | server.listen(port, '0.0.0.0') 42 | except OSError: 43 | self._logger.error('Could not start application on port {}. Is port already in use?'.format(port)) 44 | sys.exit(1) 45 | 46 | ioloop = tornado.ioloop.IOLoop.instance() 47 | 48 | # add a teardown callback that will stop the tornado server 49 | stop_tornado_ioloop = functools.partial(ioloop.add_callback, callback=ioloop.stop) 50 | UnhandledExceptionHandler.singleton().add_teardown_callback(stop_tornado_ioloop) 51 | return ioloop 52 | 53 | def _write_pid_file(self, filename): 54 | fs.write_file(str(os.getpid()), filename) 55 | 56 | def remove_pid_file(): 57 | try: 58 | os.remove(filename) 59 | except OSError: 60 | pass 61 | UnhandledExceptionHandler.singleton().add_teardown_callback(remove_pid_file) 62 | 63 | def _get_https_options(self): 64 | https_cert_file = Configuration['https_cert_file'] 65 | https_key_file = Configuration['https_key_file'] 66 | 67 | if https_cert_file and https_key_file: 68 | return { 69 | 'certfile': https_cert_file, 70 | 'keyfile': https_key_file, 71 | } 72 | return None 73 | -------------------------------------------------------------------------------- /app/subcommands/shutdown_subcommand.py: -------------------------------------------------------------------------------- 1 | from app.client.cluster_api_client import ClusterMasterAPIClient 2 | from app.subcommands.subcommand import Subcommand 3 | from app.util import log 4 | from app.util.conf.configuration import Configuration 5 | 6 | 7 | class ShutdownSubcommand(Subcommand): 8 | 9 | def run(self, log_level, master_url, slave_ids=None, all_slaves=False, **request_params): 10 | log_level = log_level or Configuration['log_level'] 11 | log.configure_logging(log_level=log_level, simplified_console_logs=True) 12 | 13 | master_url = master_url or '{}:{}'.format(Configuration['hostname'], Configuration['port']) 14 | client = ClusterMasterAPIClient(master_url) 15 | if all_slaves: 16 | client.graceful_shutdown_all_slaves() 17 | elif slave_ids and len(slave_ids) > 0: 18 | client.graceful_shutdown_slaves_by_id(slave_ids) 19 | else: 20 | self._logger.error('No slaves specified to shutdown.') 21 | exit(1) 22 | -------------------------------------------------------------------------------- /app/subcommands/slave_subcommand.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from app.slave.cluster_slave import ClusterSlave 4 | from app.subcommands.service_subcommand import ServiceSubcommand 5 | from app.util import analytics, log 6 | from app.util.conf.configuration import Configuration 7 | from app.web_framework.cluster_slave_application import ClusterSlaveApplication 8 | 9 | 10 | class SlaveSubcommand(ServiceSubcommand): 11 | _THREAD_NAME = 'SlaveTornadoThread' 12 | 13 | def async_run(self, port, master_url, num_executors, log_level, eventlog_file): 14 | """ 15 | Run a ClusterRunner slave service. 16 | 17 | :param port: the port on which to run the slave service 18 | :type port: int | None 19 | :param master_url: the url of the master to which this slave should attach 20 | :type master_url: string | None 21 | :param num_executors: the number of executors the slave service should use 22 | :type num_executors: int | None 23 | :param log_level: the log level at which to do application logging (or None for default log level) 24 | :type log_level: str | None 25 | :param eventlog_file: an optional alternate file in which to write event logs 26 | :type eventlog_file: str | None 27 | """ 28 | num_executors = num_executors or Configuration['num_executors'] 29 | master_url = master_url or '{}:{}'.format(Configuration['master_hostname'], Configuration['master_port']) 30 | port = port or Configuration['port'] 31 | log_level = log_level or Configuration['log_level'] 32 | eventlog_file = eventlog_file or Configuration['eventlog_file'] 33 | 34 | log.configure_logging(log_level=log_level, log_file=Configuration['log_file'].format(port)) 35 | analytics.initialize(eventlog_file) 36 | analytics.record_event(analytics.SERVICE_STARTED, service='slave') 37 | 38 | cluster_slave = ClusterSlave( 39 | port=port, 40 | num_executors=num_executors, 41 | host=Configuration['hostname'], 42 | ) 43 | 44 | application = ClusterSlaveApplication(cluster_slave) 45 | 46 | ioloop = self._start_application(application, port) 47 | 48 | self._write_pid_file(Configuration['slave_pid_file']) 49 | 50 | # connect to master once tornado ioloop is running 51 | connect_slave_to_master = functools.partial(cluster_slave.connect_to_master, master_url=master_url) 52 | ioloop.add_callback(connect_slave_to_master) 53 | 54 | # start sending heartbeat after connecting to master 55 | start_slave_heartbeat = functools.partial(cluster_slave.start_heartbeat_thread) 56 | ioloop.add_callback(start_slave_heartbeat) 57 | 58 | ioloop.start() # this call blocks until the server is stopped 59 | ioloop.close(all_fds=True) # all_fds=True is necessary here to make sure connections don't hang 60 | self._logger.notice('Slave server was stopped.') 61 | -------------------------------------------------------------------------------- /app/subcommands/subcommand.py: -------------------------------------------------------------------------------- 1 | from app.util import log 2 | 3 | 4 | class Subcommand(object): 5 | 6 | def __init__(self): 7 | self._logger = log.get_logger(__name__) 8 | 9 | def run(self, *args, **kwargs): 10 | raise NotImplementedError 11 | -------------------------------------------------------------------------------- /app/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/util/__init__.py -------------------------------------------------------------------------------- /app/util/analytics.py: -------------------------------------------------------------------------------- 1 | from app.util.event_log import EventLog 2 | from app.util.exceptions import ItemNotReadyError 3 | 4 | BUILD_REQUEST_QUEUED = 'BUILD_REQUEST_QUEUED' 5 | BUILD_PREPARE_START = 'BUILD_PREPARE_START' 6 | BUILD_PREPARE_FINISH = 'BUILD_PREPARE_FINISH' 7 | BUILD_SETUP_START = 'BUILD_SETUP_START' 8 | BUILD_SETUP_FINISH = 'BUILD_SETUP_FINISH' 9 | MASTER_RECEIVED_RESULT = 'MASTER_RECEIVED_RESULT' 10 | MASTER_TRIGGERED_SUBJOB = 'MASTER_TRIGGERED_SUBJOB' 11 | SERVICE_STARTED = 'SERVICE_STARTED' 12 | SUBJOB_EXECUTION_FINISH = 'SUBJOB_EXECUTION_FINISH' 13 | SUBJOB_EXECUTION_START = 'SUBJOB_EXECUTION_START' 14 | ATOM_START = 'ATOM_START' 15 | ATOM_FINISH = 'ATOM_FINISH' 16 | 17 | _event_log = None 18 | 19 | 20 | def initialize(eventlog_file=None): 21 | """ 22 | Initialize the analytics output. This will cause analytics events to be output to either a file or stdout. 23 | 24 | If this function is not called, analytics events will not be output. If it is called with a filename, the events 25 | will be output to that file. If it is called with 'STDOUT' or None, the events will be output to stdout. 26 | 27 | :param eventlog_file: The filename to output events to, 'STDOUT' to output to stdout, None to disable event logging 28 | :type eventlog_file: str | None 29 | """ 30 | global _event_log 31 | 32 | _event_log = EventLog(filename=eventlog_file) 33 | 34 | 35 | def record_event(tag, log_msg=None, **event_data): 36 | """ 37 | Record an event containing the specified data. Currently this just json-ifies the event and outputs it to the 38 | configured analytics logger (see analytics.initialize()). 39 | 40 | :param tag: A string identifier that describes the event being logged (e.g., "REQUEST_SENT") 41 | :type tag: str 42 | :param log_msg: A message that will also be logged to the human-readable log (not the event log). It will be string 43 | formatted with the event_data dict. This is a convenience for logging to both human- and machine-readable logs. 44 | :type log_msg: str 45 | :param event_data: Free-form key value pairs that make up the event 46 | :type event_data: dict 47 | """ 48 | if _event_log: 49 | _event_log.record_event(tag, log_msg=log_msg, **event_data) 50 | 51 | 52 | def get_events(since_timestamp=None, since_id=None): 53 | """ 54 | Retrieve all events from the current eventlog since the given timestamp or event id. This is used to expose events 55 | via the API and is useful for building dashboards that monitor the system. 56 | 57 | :param since_timestamp: Get all events after (greater than) this timestamp 58 | :type since_timestamp: float | None 59 | :param since_id: Get all events after (greater than) this id 60 | :type since_id: int | None 61 | :return: The list of events in the given range 62 | :rtype: list[dict] | None 63 | """ 64 | if _event_log: 65 | since_timestamp = float(since_timestamp) if since_timestamp else since_timestamp 66 | since_id = int(since_id) if since_id else since_id 67 | return _event_log.get_events(since_timestamp=since_timestamp, since_id=since_id) 68 | else: 69 | raise ItemNotReadyError('Analytics was not initialized. Call initialize first') 70 | -------------------------------------------------------------------------------- /app/util/app_info.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import threading 3 | import traceback 4 | 5 | 6 | def get_app_info_string(): 7 | """ 8 | Get a string representing global information about the application. This is used for debugging. 9 | 10 | :rtype: str 11 | """ 12 | app_info_list = _get_formatted_thread_stack_traces() 13 | return '\n'.join(app_info_list) 14 | 15 | 16 | def _get_formatted_thread_stack_traces(): 17 | """ 18 | Get the formatted stack trace string for each currently running thread. 19 | 20 | :rtype: list[str] 21 | """ 22 | formatted_traces = [] 23 | threads_by_id = {thread.ident: thread for thread in threading.enumerate()} 24 | 25 | # The sys_current_frames() method is intended to be used for debugging like this. 26 | for thread_id, stack in sys._current_frames().items(): # pylint: disable=protected-access 27 | thread = threads_by_id.get(thread_id) 28 | if thread: 29 | thread_type = 'daemon' if thread.isDaemon() else 'nondaemon' 30 | thread_stack_trace = ''.join(traceback.format_stack(stack)) 31 | formatted_traces.append('Current trace for {} thread "{}":\n{}' 32 | .format(thread_type, thread.name, thread_stack_trace)) 33 | 34 | return formatted_traces 35 | -------------------------------------------------------------------------------- /app/util/conf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/util/conf/__init__.py -------------------------------------------------------------------------------- /app/util/conf/config_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import stat 3 | 4 | from configobj import ConfigObj 5 | 6 | from app.util import fs 7 | from app.util.process_utils import is_windows 8 | 9 | 10 | class ConfigFile(object): 11 | CONFIG_FILE_MODE = stat.S_IRUSR | stat.S_IWUSR 12 | 13 | def __init__(self, filename): 14 | """ 15 | :type filename: str 16 | """ 17 | self._filename = filename 18 | 19 | def read_config_from_disk(self): 20 | """ 21 | Parse an INI-style config file from disk. 22 | """ 23 | if not os.path.isfile(self._filename): 24 | raise FileNotFoundError('Conf file {} does not exist'.format(self._filename)) 25 | file_mode = stat.S_IMODE(os.stat(self._filename).st_mode) 26 | if not is_windows() and file_mode != self.CONFIG_FILE_MODE: 27 | raise PermissionError('The conf file {} has incorrect permissions, ' 28 | 'should be 0600 for security reasons'.format(self._filename)) 29 | config_parsed = ConfigObj(self._filename) 30 | return config_parsed 31 | 32 | def write_value(self, name, value, section): 33 | """ 34 | Update this file by writing a single value to a section of a configuration file. 35 | :type name: str 36 | :type value: str 37 | :type section: str 38 | """ 39 | config_parsed = self.read_config_from_disk() 40 | config_parsed[section][name] = value 41 | self._write_config_to_disk(config_parsed) 42 | 43 | def _write_config_to_disk(self, config_parsed): 44 | """ 45 | Write a data structure of parsed config values to disk in an INI-style format. 46 | :type config_parsed: ConfigObj 47 | """ 48 | fs.create_dir(os.path.dirname(self._filename)) 49 | config_parsed.write() 50 | os.chmod(self._filename, self.CONFIG_FILE_MODE) 51 | -------------------------------------------------------------------------------- /app/util/conf/configuration.py: -------------------------------------------------------------------------------- 1 | from app.util.singleton import Singleton 2 | 3 | 4 | class _ConfigurationMetaclass(type): 5 | """ 6 | Metaclass for Configuration class to allow keyed access on the singleton instance 7 | """ 8 | def __getitem__(cls, item): 9 | configuration = Configuration.singleton() 10 | return configuration.get(item) 11 | 12 | def __setitem__(cls, key, value): 13 | configuration = Configuration.singleton() 14 | configuration.set(key, value) 15 | 16 | def __contains__(cls, key): 17 | configuration = Configuration.singleton() 18 | return key in configuration.properties 19 | 20 | 21 | class Configuration(Singleton, metaclass=_ConfigurationMetaclass): 22 | """ 23 | The main singleton configuration class -- the default configuration is in conf.base_conf 24 | 25 | Access configuration settings using configuration keys: 26 | >>> app_name = Configuration['name'] 27 | """ 28 | 29 | def __init__(self, as_instance=False): 30 | """ 31 | :param as_instance: should this be instantiated as an instance variable? 32 | :type as_instance: bool 33 | :return: 34 | """ 35 | if not as_instance: 36 | super().__init__() 37 | self.properties = {} 38 | 39 | def set(self, name, value): 40 | self.properties[name] = value 41 | return self 42 | 43 | def get(self, name): 44 | return self.properties[name] 45 | -------------------------------------------------------------------------------- /app/util/conf/deploy_config_loader.py: -------------------------------------------------------------------------------- 1 | from app.util.conf.base_config_loader import BaseConfigLoader 2 | 3 | 4 | class DeployConfigLoader(BaseConfigLoader): 5 | 6 | def configure_defaults(self, conf): 7 | """ 8 | These are the slave configuration defaults. These values can override values in BaseConfigLoader. 9 | :type conf: Configuration 10 | """ 11 | super().configure_defaults(conf) 12 | conf.set('log_filename', 'clusterrunner_deploy.log') 13 | conf.set('log_level', 'INFO') 14 | -------------------------------------------------------------------------------- /app/util/conf/master_config_loader.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | 3 | from app.util.conf.base_config_loader import BaseConfigLoader 4 | 5 | 6 | class MasterConfigLoader(BaseConfigLoader): 7 | 8 | CONFIG_FILE_SECTION = 'master' 9 | 10 | def configure_defaults(self, conf): 11 | """ 12 | This is the master configuration. These values should override values in base_conf.py. 13 | 14 | :type conf: Configuration 15 | """ 16 | super().configure_defaults(conf) 17 | conf.set('port', 43000) 18 | conf.set('log_filename', 'clusterrunner_master.log') 19 | conf.set('eventlog_filename', 'eventlog_master.log') 20 | conf.set('shallow_clones', False) 21 | 22 | # Default values for heartbeat configuration 23 | conf.set('unresponsive_slaves_cleanup_interval', 600) 24 | 25 | def configure_postload(self, conf): 26 | """ 27 | After the clusterrunner.conf file has been loaded, generate the master-specific paths which descend from the 28 | base_directory. 29 | :type conf: Configuration 30 | """ 31 | super().configure_postload(conf) 32 | base_directory = conf.get('base_directory') 33 | # where repos are cloned on the master 34 | conf.set('repo_directory', join(base_directory, 'repos', 'master')) 35 | # where the slave's result artifacts should be stored 36 | conf.set('artifact_directory', join(base_directory, 'artifacts')) 37 | 38 | log_dir = conf.get('log_dir') 39 | conf.set('log_file', join(log_dir, 'clusterrunner_master.log')) 40 | conf.set('eventlog_file', join(log_dir, 'eventlog_master.log')) 41 | # where to store results on the master 42 | conf.set('results_directory', join(base_directory, 'results', 'master')) 43 | conf.set('timings_directory', join(base_directory, 'timings', 'master')) # timing data 44 | -------------------------------------------------------------------------------- /app/util/conf/slave_config_loader.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | 3 | from app.util.conf.base_config_loader import BaseConfigLoader 4 | 5 | 6 | class SlaveConfigLoader(BaseConfigLoader): 7 | 8 | CONFIG_FILE_SECTION = 'slave' 9 | 10 | def configure_defaults(self, conf): 11 | """ 12 | These are the slave configuration defaults. These values can override values in BaseConfigLoader. 13 | :type conf: Configuration 14 | """ 15 | super().configure_defaults(conf) 16 | conf.set('port', 43001) 17 | conf.set('num_executors', 1) 18 | conf.set('log_filename', 'clusterrunner_slave.log') 19 | conf.set('eventlog_filename', 'eventlog_slave.log') 20 | conf.set('master_hostname', 'localhost') 21 | conf.set('master_port', 43000) 22 | conf.set('shallow_clones', True) 23 | # Use a longer timeout for slaves since we don't yet have request metrics on the slave side and since 24 | # slaves are more likely to encounter long response times on the master due to the master being a 25 | # centralized hub with a single-threaded server. 26 | conf.set('default_http_timeout', 120) 27 | 28 | # Default values for heartbeat configuration 29 | conf.set('heartbeat_interval', 60) 30 | conf.set('heartbeat_failure_threshold', 10) 31 | 32 | def configure_postload(self, conf): 33 | """ 34 | After the clusterrunner.conf file has been loaded, generate the slave-specific paths which descend from the 35 | base_directory. 36 | :type conf: Configuration 37 | """ 38 | super().configure_postload(conf) 39 | base_directory = conf.get('base_directory') 40 | # where repos are cloned on the slave 41 | conf.set('repo_directory', join(base_directory, 'repos', 'slave')) 42 | # where the slave's result artifacts should be stored 43 | conf.set('artifact_directory', join(base_directory, 'artifacts')) 44 | # where to store results on the slave 45 | conf.set('results_directory', join(base_directory, 'results', 'slave')) 46 | conf.set('timings_directory', join(base_directory, 'timings', 'master')) # timing data 47 | -------------------------------------------------------------------------------- /app/util/conf/stop_config_loader.py: -------------------------------------------------------------------------------- 1 | from app.util.conf.base_config_loader import BaseConfigLoader 2 | 3 | 4 | class StopConfigLoader(BaseConfigLoader): 5 | 6 | def configure_defaults(self, conf): 7 | """ 8 | These are the slave configuration defaults. These values can override values in BaseConfigLoader. 9 | :type conf: Configuration 10 | """ 11 | super().configure_defaults(conf) 12 | conf.set('log_filename', 'clusterrunner_stop.log') 13 | conf.set('log_level', 'INFO') 14 | -------------------------------------------------------------------------------- /app/util/counter.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | 3 | 4 | class Counter(object): 5 | """ 6 | A thread-safe counter. 7 | """ 8 | def __init__(self, start=0, step=1): 9 | self._step = step 10 | self._counter = Queue(maxsize=1) 11 | self._counter.put(start) 12 | 13 | def increment(self): 14 | return self._change_current_value(self._step) 15 | 16 | def decrement(self): 17 | return self._change_current_value(-self._step) 18 | 19 | def value(self): 20 | return self._change_current_value(0) 21 | 22 | def _change_current_value(self, delta): 23 | i = self._counter.get() # will block until another thread finishes calling put 24 | self._counter.put(i + delta) 25 | return i + delta 26 | -------------------------------------------------------------------------------- /app/util/decorators.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | import time 3 | 4 | from app.util import log 5 | from app.util.exceptions import AuthenticationError 6 | from app.util.secret import Secret 7 | 8 | 9 | def retry_on_exception_exponential_backoff(exceptions, initial_delay=0.1, total_delay=15, exponential_factor=2): 10 | """ 11 | Retries with exponential backoff. 12 | 13 | :param exceptions: The exceptions that we will catch and retry on. 14 | :type exceptions: list[Exception] 15 | :param initial_delay: num seconds that the first retry period will be 16 | :type initial_delay: float 17 | :param total_delay: the total number of seconds of the sum of all retry periods 18 | :type total_delay: float 19 | :param exponential_factor: Cannot be smaller than 1. 20 | :type exponential_factor: float 21 | """ 22 | def method_decorator(function): 23 | @wraps(function) 24 | def function_with_retries(*args, **kwargs): 25 | # If initial_delay is negative, then exponentiation would go infinitely. 26 | if initial_delay <= 0: 27 | raise RuntimeError('initial_delay must be greater than 0, was set to {}'.format(str(initial_delay))) 28 | 29 | # The exponential factor must be greater than 1. 30 | if exponential_factor <= 1: 31 | raise RuntimeError('exponential_factor, {}, must be greater than 1'.format(exponential_factor)) 32 | 33 | delay = initial_delay 34 | total_delay_so_far = 0 35 | 36 | while True: 37 | try: 38 | return function(*args, **kwargs) 39 | except exceptions as ex: 40 | if total_delay_so_far > total_delay: 41 | raise # final attempt failed 42 | log.get_logger(__name__).warning('Call to {} raised {}("{}"). Retrying in {} seconds.', 43 | function.__qualname__, type(ex).__name__, ex, delay) 44 | time.sleep(delay) 45 | total_delay_so_far += delay 46 | delay *= exponential_factor 47 | 48 | return function_with_retries 49 | return method_decorator 50 | 51 | 52 | def authenticated(function): 53 | """ 54 | Fail the request if the correct secret is not included in either the headers or the request body. This should be 55 | called on all mutation requests. (POST, PUT) 56 | """ 57 | @wraps(function) 58 | def function_with_auth(self, *args, **kwargs): 59 | header_digest = self.request.headers.get(Secret.DIGEST_HEADER_KEY) 60 | if not Secret.digest_is_valid(header_digest, self.encoded_body.decode('utf-8')): 61 | raise AuthenticationError('Message digest does not match header, message not authenticated.') 62 | 63 | return function(self, *args, **kwargs) 64 | 65 | return function_with_auth 66 | -------------------------------------------------------------------------------- /app/util/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class ItemNotReadyError(Exception): 4 | """ 5 | An exception to represent the case where something was not yet ready or does not yet exist, but will definitely 6 | exist at a future point. Example: trying to download the results for a build that has not finished. The web 7 | framework should translate this exception to a 202 response. 8 | """ 9 | 10 | 11 | class BadRequestError(Exception): 12 | """ 13 | An exception to represent the case where something in the request was bad or malformed. Example: bad JSON data in 14 | request body. The web framework should translate this exception to a 400 response. 15 | """ 16 | 17 | 18 | class AuthenticationError(Exception): 19 | """ 20 | An exception to represent the case where authentication credentials were either not present or incorrect. The web 21 | framework should translate this exception to a 401 response. 22 | """ 23 | 24 | 25 | class ItemNotFoundError(Exception): 26 | """ 27 | An exception to represent the case where something was not found. Example: trying to get the status for a 28 | non-existent build. The web framework should translate this exception to a 404 response. 29 | """ 30 | 31 | 32 | class PreconditionFailedError(Exception): 33 | """ 34 | An exception to represent the case when a non-authentication-related precondition for accessing the resource 35 | was not met. For example, the session id token has expired. The web framework should translate this exception 36 | to a 412 response. 37 | """ 38 | -------------------------------------------------------------------------------- /app/util/ordered_set_queue.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from queue import Queue 3 | 4 | 5 | class OrderedSetQueue(Queue): 6 | """ 7 | A queue based on ordered set. This behaves just like a normal queue but does not allow the same item to be in the 8 | queue more than once. 9 | """ 10 | def _init(self, maxsize): 11 | self.queue = OrderedSet() 12 | 13 | def _put(self, item): 14 | self.queue.add(item) 15 | 16 | def _get(self): 17 | return self.queue.pop() 18 | 19 | 20 | class OrderedSet(collections.MutableSet): 21 | """ 22 | Set that remembers original insertion order. 23 | Code from http://code.activestate.com/recipes/576694/ 24 | """ 25 | 26 | def __init__(self, iterable=None): 27 | self.end = end = [] 28 | end += [None, end, end] # sentinel node for doubly linked list 29 | self.map = {} # key --> [key, prev, next] 30 | if iterable is not None: 31 | self |= iterable 32 | 33 | def __len__(self): 34 | return len(self.map) 35 | 36 | def __contains__(self, key): 37 | return key in self.map 38 | 39 | def add(self, key): 40 | if key not in self.map: 41 | end = self.end 42 | curr = end[1] 43 | curr[2] = end[1] = self.map[key] = [key, curr, end] 44 | 45 | def discard(self, key): 46 | if key in self.map: 47 | key, prev, nxt = self.map.pop(key) 48 | prev[2] = nxt 49 | nxt[1] = prev 50 | 51 | def __iter__(self): 52 | end = self.end 53 | curr = end[2] 54 | while curr is not end: 55 | yield curr[0] 56 | curr = curr[2] 57 | 58 | def __reversed__(self): 59 | end = self.end 60 | curr = end[1] 61 | while curr is not end: 62 | yield curr[0] 63 | curr = curr[1] 64 | 65 | def pop(self, last=True): 66 | if not self: 67 | raise KeyError('set is empty') 68 | key = self.end[1][0] if last else self.end[2][0] 69 | self.discard(key) 70 | return key 71 | 72 | def __repr__(self): 73 | if not self: 74 | return '%s()' % (self.__class__.__name__,) 75 | return '%s(%r)' % (self.__class__.__name__, list(self)) 76 | 77 | def __eq__(self, other): 78 | if isinstance(other, OrderedSet): 79 | return len(self) == len(other) and list(self) == list(other) 80 | return set(self) == set(other) 81 | -------------------------------------------------------------------------------- /app/util/pagination.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | 4 | def get_paginated_indices(offset: Optional[int], limit: Optional[int], length: int) -> Tuple[int, int]: # pylint: disable=invalid-sequence-index 5 | """ 6 | Given an offset and a limit, return the correct starting and ending indices to paginate with that are valid within 7 | a given length of the item being paginated. 8 | :param offset: The offset from the starting item for the request 9 | :param limit: The limit or amount of items for the request 10 | :param length: The length of the list which we are getting indices to paginate 11 | """ 12 | # Either both limit and offset are set set or neither are set, so if one or more isn't set 13 | # then we return the entire list. This usually implies `v1` is being called where we don't paginate at all. 14 | if offset is None or limit is None: 15 | return 0, length 16 | 17 | # Remove any negative values. 18 | offset = max(offset, 0) 19 | limit = max(limit, 0) 20 | 21 | # If limit is set higher than the number of builds, reduce limit. 22 | limit = min(length, limit) 23 | 24 | starting_index = offset 25 | ending_index = min((starting_index + limit), length) 26 | 27 | return starting_index, ending_index 28 | -------------------------------------------------------------------------------- /app/util/poll.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def wait_for(boolean_predicate, timeout_seconds=None, poll_period=0.25, exceptions_to_swallow=None): 5 | """ 6 | Waits a specified amount of time for the conditional predicate to be true. 7 | 8 | :param boolean_predicate: A callable to continually evaluate until it returns a truthy value 9 | :type boolean_predicate: callable 10 | :param timeout_seconds: The timeout (in seconds) 11 | :type timeout_seconds: int 12 | :param poll_period: The frequency at which boolean_predicate should be evaluated 13 | :type poll_period: float 14 | :param exceptions_to_swallow: A set of acceptable exceptions that may be thrown by boolean_predicate 15 | :type exceptions_to_swallow: Exception | list(Exception) 16 | :return: True if boolean_predicate returned True before the timeout; False otherwise 17 | :rtype: bool 18 | """ 19 | exceptions_to_swallow = exceptions_to_swallow or () 20 | timeout_seconds = timeout_seconds or float('inf') 21 | 22 | end_time = time.time() + timeout_seconds 23 | while time.time() < end_time: 24 | try: 25 | if boolean_predicate(): 26 | return True 27 | except exceptions_to_swallow: 28 | pass 29 | 30 | time.sleep(poll_period) 31 | return False 32 | -------------------------------------------------------------------------------- /app/util/safe_thread.py: -------------------------------------------------------------------------------- 1 | from threading import Thread 2 | 3 | from app.util.unhandled_exception_handler import UnhandledExceptionHandler 4 | 5 | 6 | class SafeThread(Thread): 7 | """ 8 | This class represents an application thread that should not be allowed to raise an exception without also shutting 9 | down the entire application. Any exceptions raised from this thread will be funneled through the unhandled 10 | exception handler. 11 | 12 | Unless we have a specific reason not to, we should use this class everywhere throughout the application instead of 13 | threading.Thread. 14 | """ 15 | def run(self): 16 | unhandled_exception_handler = UnhandledExceptionHandler.singleton() 17 | with unhandled_exception_handler: 18 | super().run() 19 | -------------------------------------------------------------------------------- /app/util/secret.py: -------------------------------------------------------------------------------- 1 | import hmac 2 | 3 | 4 | class Secret: 5 | DIGEST_HEADER_KEY = 'Clusterrunner-Message-Authentication-Digest' 6 | _secret = None 7 | 8 | # Due to Issue #358, it is possible to have autogenerated this insecure secret which should not be used. 9 | _BAD_SECRET = 'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce' \ 10 | '47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e' 11 | 12 | @classmethod 13 | def get(cls): 14 | """ 15 | :return: The secret that was set with the set(secret) method 16 | :rtype: str 17 | """ 18 | return cls._secret 19 | 20 | @classmethod 21 | def set(cls, secret): 22 | """ 23 | :param secret: The secret that will be used by the application to authenticate network requests 24 | :type secret: str 25 | """ 26 | if not secret or len(secret) == 0: 27 | raise InsecureSecretError('Empty secret is not allowed!') 28 | if len(secret) < 8: 29 | raise InsecureSecretError('Your secret must be at least 8 characters long!') 30 | if secret == cls._BAD_SECRET: 31 | raise InsecureSecretError('Your current secret is insecure! (See ClusterRunner issue #358.)') 32 | cls._secret = secret 33 | 34 | @classmethod 35 | def header(cls, message, secret=None): 36 | """ 37 | Produces a header which contains a digest of a message, generated using the shared secret 38 | :type secret: str 39 | :return: The header to use in authenticated network requests 40 | :rtype: dict 41 | """ 42 | secret = secret or cls.get() 43 | return {cls.DIGEST_HEADER_KEY: cls._get_hex_digest(message, secret)} 44 | 45 | @classmethod 46 | def _get_hex_digest(cls, message, secret): 47 | """ 48 | Create a message authentication digest, using a shared secret and the message 49 | :type message: str 50 | :type secret: str 51 | :return: A 64 character hex string 52 | :rtype: str 53 | """ 54 | hmac_digester = hmac.new(secret.encode('utf-8'), message.encode('utf-8'), digestmod='sha512') 55 | return hmac_digester.hexdigest() 56 | 57 | @classmethod 58 | def digest_is_valid(cls, digest_received, message_received): 59 | """ 60 | Check if a digested message matches the digest passed in. 61 | :param digest_received: The Message Authentication digest the client has passed in 62 | :type digest_received: str 63 | :param message_received: The message the client has passed in 64 | :type message_received: str 65 | :return: Whether the message digest matches the digest passed in (proving the client knows the same secret) 66 | :rtype: bool 67 | """ 68 | digest_received = digest_received or '' 69 | message_digest = cls._get_hex_digest(message_received, cls.get()) 70 | 71 | # hmac.compare_digest protects against timing attacks 72 | if not hmac.compare_digest(digest_received, message_digest): 73 | return False 74 | return True 75 | 76 | 77 | class InsecureSecretError(RuntimeError): 78 | _regen_msg = ' Please generate a new secret and add it to your clusterrunner.conf. If you delete the secret in' \ 79 | ' your current config file, a new one will be generated the next time the service is started.' 80 | 81 | def __init__(self, msg): 82 | super().__init__(msg + self._regen_msg) 83 | -------------------------------------------------------------------------------- /app/util/session_id.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | 4 | class SessionId(object): 5 | EXPECTED_SESSION_HEADER_KEY = 'Expected-Session-Id' 6 | SESSION_HEADER_KEY = 'Session-Id' 7 | 8 | _session_id = None 9 | 10 | @classmethod 11 | def get(cls): 12 | """ 13 | :return: the unique, generated session id string. 14 | :rtype: str 15 | """ 16 | if cls._session_id is None: 17 | cls._session_id = str(uuid.uuid4()) 18 | return cls._session_id 19 | -------------------------------------------------------------------------------- /app/util/shell/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/util/shell/__init__.py -------------------------------------------------------------------------------- /app/util/shell/local_shell_client.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from subprocess import PIPE, DEVNULL 3 | 4 | from app.util.log import get_logger 5 | from app.util.process_utils import Popen_with_delayed_expansion 6 | from app.util.shell.shell_client import ShellClient, Response, EmptyResponse 7 | 8 | 9 | class LocalShellClient(ShellClient): 10 | def __init__(self, host, user): 11 | super().__init__(host, user) 12 | self._logger = get_logger(__name__) 13 | 14 | def _exec_command_on_client_async(self, command): 15 | """ 16 | :param command: 17 | :return: 18 | :rtype: Response 19 | """ 20 | # todo investigate why this assignment is required for launching async operations using Popen 21 | self._logger.debug('popen async [{}:{}]: {}'.format(self.user, self.host, command)) 22 | Popen_with_delayed_expansion(command, shell=True, stdout=DEVNULL, stderr=DEVNULL) 23 | return EmptyResponse() 24 | 25 | def _exec_command_on_client_blocking(self, command): 26 | """ 27 | :param command: 28 | :type command: str 29 | :return: 30 | :rtype: Response 31 | """ 32 | proc = Popen_with_delayed_expansion(command, shell=True, stdout=PIPE, stderr=PIPE) 33 | self._logger.debug('popen blocking [{}:{}]: {}'.format(self.user, self.host, command)) 34 | output, error = proc.communicate() 35 | return Response(raw_output=output, raw_error=error, returncode=proc.returncode) 36 | 37 | def _copy_on_client(self, source, destination): 38 | """ 39 | :param source: 40 | :type source: str 41 | :param destination: 42 | :type destination: str 43 | :return: 44 | :rtype: Response 45 | """ 46 | new_location = shutil.copy(source, destination) 47 | # todo detect failure and specify returncode and error 48 | return Response(raw_output=new_location.encode(), returncode=0) 49 | -------------------------------------------------------------------------------- /app/util/shell/remote_shell_client.py: -------------------------------------------------------------------------------- 1 | from subprocess import PIPE, DEVNULL 2 | 3 | from app.util.log import get_logger 4 | from app.util.process_utils import Popen_with_delayed_expansion 5 | from app.util.shell.shell_client import ShellClient, Response, EmptyResponse 6 | 7 | 8 | class RemoteShellClient(ShellClient): 9 | def __init__(self, host, user): 10 | super().__init__(host, user) 11 | self._logger = get_logger(__name__) 12 | 13 | def _exec_command_on_client_async(self, command): 14 | """ 15 | :type command: str 16 | :rtype: Response 17 | """ 18 | escaped_command = self._escaped_ssh_command(command) 19 | self._logger.debug('SSH popen async [{}:{}]: {}'.format(self.user, self.host, escaped_command)) 20 | Popen_with_delayed_expansion(escaped_command, shell=True, stdout=DEVNULL, stderr=DEVNULL) 21 | return EmptyResponse() 22 | 23 | def _exec_command_on_client_blocking(self, command): 24 | """ 25 | :type command: str 26 | :rtype: Response 27 | """ 28 | escaped_command = self._escaped_ssh_command(command) 29 | self._logger.debug('SSH popen blocking [{}:{}]: {}'.format(self.user, self.host, escaped_command)) 30 | proc = Popen_with_delayed_expansion(escaped_command, shell=True, stdout=PIPE, stderr=PIPE) 31 | output, error = proc.communicate() 32 | return Response(raw_output=output, raw_error=error, returncode=proc.returncode) 33 | 34 | def _copy_on_client(self, source, destination): 35 | """ 36 | :type source: str 37 | :type destination: str 38 | :rtype: Response 39 | """ 40 | # Avoid any ssh known_hosts prompts. 41 | command = 'scp -o StrictHostKeyChecking=no {} {}:{}'.format(source, self._host_string(), destination) 42 | self._logger.debug('SCP popen blocking [{}:{}]: {}'.format(self.user, self.host, command)) 43 | proc = Popen_with_delayed_expansion(command, shell=True, stdout=PIPE, stderr=PIPE) 44 | output, error = proc.communicate() 45 | return Response(raw_output=output, raw_error=error, returncode=proc.returncode) 46 | 47 | def _escaped_ssh_command(self, command): 48 | """ 49 | :param command: the command to execute if it were local 50 | :type command: str 51 | :return: the escaped command wrapped around an ssh call 52 | :rtype: str 53 | """ 54 | escaped_command = command.replace("'", "\'") 55 | # Avoid any ssh known_hosts prompts. 56 | return "ssh -o StrictHostKeyChecking=no {} '{}'".format(self._host_string(), escaped_command) 57 | 58 | def _host_string(self): 59 | """ 60 | Return either the host, or the username@host if the username is specified. 61 | 62 | :rtype: str 63 | """ 64 | return self.host if self.user is None else "{}@{}".format(self.user, self.host) 65 | -------------------------------------------------------------------------------- /app/util/shell/shell_client_factory.py: -------------------------------------------------------------------------------- 1 | from app.util.network import Network 2 | from app.util.shell.local_shell_client import LocalShellClient 3 | from app.util.shell.remote_shell_client import RemoteShellClient 4 | 5 | 6 | class ShellClientFactory(object): 7 | @classmethod 8 | def create(cls, host, user): 9 | if Network.are_hosts_same(host, 'localhost'): 10 | return LocalShellClient(host, user) 11 | else: 12 | return RemoteShellClient(host, user) 13 | -------------------------------------------------------------------------------- /app/util/single_use_coin.py: -------------------------------------------------------------------------------- 1 | from threading import Lock 2 | 3 | 4 | class SingleUseCoin(object): 5 | """ 6 | A SingleUseCoin acts as a thread-safe, one-time flag. For example, this is useful for enforcing that a specific 7 | code path is only traversed exactly once, even across multiple threads. 8 | 9 | The first time spend() is called, it will return True. All subsequent calls to spend() will return False. If many 10 | threads call spend(), it is guaranteed that exactly one will return True. 11 | """ 12 | def __init__(self): 13 | self._is_spent = False 14 | self._spend_lock = Lock() 15 | 16 | def spend(self): 17 | """ 18 | Returns whether or not the coin was spent. The coin can only be spent one time. 19 | 20 | :return: True the first time that this method is called, False all subsequent calls 21 | :rtype: bool 22 | """ 23 | with self._spend_lock: 24 | if self._is_spent: 25 | return False 26 | 27 | self._is_spent = True 28 | return True 29 | -------------------------------------------------------------------------------- /app/util/singleton.py: -------------------------------------------------------------------------------- 1 | from threading import RLock 2 | 3 | 4 | class Singleton(object): 5 | 6 | _instance_lock = RLock() 7 | _singleton_instance = None 8 | 9 | @classmethod 10 | def singleton(cls): 11 | """ 12 | Get the singleton instance. Create it if it doesn't exist. 13 | """ 14 | with cls._instance_lock: 15 | if cls._singleton_instance is None: 16 | cls._singleton_instance = cls() 17 | return cls._singleton_instance 18 | 19 | @classmethod 20 | def reset_singleton(cls): 21 | """ 22 | Reset the singleton instance. 23 | """ 24 | with cls._instance_lock: 25 | if cls._singleton_instance is not None: 26 | del cls._singleton_instance 27 | cls._singleton_instance = None 28 | 29 | def __init__(self): 30 | """ 31 | Raise an error if we attempt to instantiate multiple instances. 32 | 33 | Note that we *could* make every instantiation return the same instance -- Python allows this -- but have chosen 34 | not to. This is because we do not want client code to be ignorant of the fact that this object is a singleton. 35 | """ 36 | with self._instance_lock: 37 | if self._singleton_instance is not None: 38 | raise SingletonError('Cannot instantiate singleton more than once. Use the singleton() class method.') 39 | 40 | 41 | class SingletonError(Exception): 42 | """ 43 | Raised when a singleton has been misused (e.g., instantiated more than once.) 44 | """ 45 | -------------------------------------------------------------------------------- /app/util/url_builder.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urllib.parse import urljoin 3 | from app.util.conf.configuration import Configuration 4 | 5 | 6 | class UrlBuilder(object): 7 | """ 8 | Stores the host, port, scheme, and api version information for our URLs and centralizes their generation 9 | """ 10 | API_VERSION_1 = 'v1' 11 | 12 | def __init__(self, service_address, api_version=API_VERSION_1): 13 | """ 14 | :param service_address: A host and port and optional scheme, like "http(s)://hostname.example.com:43000" 15 | :type service_address: str 16 | :type api_version: str 17 | """ 18 | self._service_address = service_address 19 | self._api_version = api_version 20 | self._scheme = '{}://'.format(Configuration['protocol_scheme']) 21 | 22 | def url(self, *args): 23 | """ 24 | Produces a url given a set of paths 25 | :param args: A list of args to string together into a url path 26 | :type args: iterable [str|int] 27 | :rtype: str 28 | """ 29 | schemed_address = self._scheme + re.sub(r'^[a-z]+://', '', self._service_address) 30 | versioned_url = urljoin(schemed_address, self._api_version) 31 | return '/'.join([versioned_url] + [str(arg).strip('/') for arg in args]) 32 | -------------------------------------------------------------------------------- /app/util/util.py: -------------------------------------------------------------------------------- 1 | from app.project_type.directory import Directory 2 | from app.project_type.git import Git 3 | 4 | 5 | _subclasses_by_name = { 6 | 'directory': Directory, 7 | 'git': Git, 8 | } 9 | 10 | 11 | def project_type_subclasses_by_name(): 12 | """ 13 | Return a mapping from project_type name to class. 14 | 15 | Note: This function cannot be placed in project_type.py because it would cause circular imports. 16 | 17 | :return: The ProjectType subclasses by type name 18 | :rtype: dict[str, type] 19 | """ 20 | return _subclasses_by_name.copy() # copy to prevent unintended modification of original 21 | 22 | 23 | def get_project_type_subclass(project_type_name): 24 | """ 25 | Given a name of an ProjectType subclass, return the class itself. 26 | 27 | Note: This function cannot be placed in project_type.py because it would cause circular imports. 28 | 29 | :param project_type_name: The name of a subclass of ProjectType (e.g., 'directory' or 'git') 30 | :type project_type_name: str 31 | :return: The ProjectType subclass corresponding to the specified type name, or None if no matching name found 32 | :rtype: type|None 33 | """ 34 | return project_type_subclasses_by_name().get(project_type_name.lower()) 35 | 36 | 37 | def create_project_type(project_type_params): 38 | """ 39 | :param project_type_params: The parameters for creating an ProjectType instance -- the dict should include the 40 | 'type' key, which specifies the ProjectType subclass name, and key/value pairs matching constructor arguments 41 | for that ProjectType subclass. 42 | :type project_type_params: dict 43 | :return: The project_type instance 44 | :rtype: project_type.project_type.ProjectType 45 | """ 46 | project_type_params = project_type_params.copy() 47 | project_type_name = project_type_params.pop('type') 48 | project_type_class = get_project_type_subclass(project_type_name) 49 | if project_type_class: 50 | return project_type_class(**project_type_params) # create object using project_type_params as constructor args 51 | 52 | # Not yet implemented other project types 53 | return None 54 | -------------------------------------------------------------------------------- /app/web_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/app/web_framework/__init__.py -------------------------------------------------------------------------------- /app/web_framework/api_version_handler.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional 3 | 4 | 5 | class APIVersionHandler: 6 | OLD_VERSIONED_URI_COMPONENT = 'v1' 7 | API_VERSION_HEADER_KEY = 'Api-Version' 8 | 9 | _versions = [ 10 | 1, 11 | 2 12 | ] 13 | 14 | @classmethod 15 | def resolve_version(cls, accept_header_value: Optional[str], uri: str) -> int: 16 | """ 17 | Get the respective version of the API relative to the request header and URI. 18 | :param accept_header_value: The value of the header which to search for version type (Content-Type/Accept). 19 | :param uri: The URI from the request being checked. 20 | """ 21 | if accept_header_value is None: 22 | return cls._get_default(uri) 23 | 24 | version = cls._get_default(uri) 25 | try: 26 | matches = re.search(r'(?:application/vnd.clusterrunner.v(\d+)\+json)', 27 | accept_header_value, re.IGNORECASE) 28 | matched_version = int(matches.group(1)) 29 | version = matched_version if matched_version in cls._versions else cls._get_default(uri) 30 | except (IndexError, AttributeError, ValueError): 31 | # No version was found or specified in the request header. 32 | pass 33 | 34 | return version 35 | 36 | @classmethod 37 | def get_first(cls) -> int: 38 | """ 39 | Return the first version of the API. This assumes that the first version is the 40 | version with the lowest value. 41 | """ 42 | return min(cls._versions) 43 | 44 | @classmethod 45 | def get_latest(cls) -> int: 46 | """ 47 | Return the latest version of the API. This assumes that the latest version is the 48 | version with the highest value. 49 | """ 50 | return max(cls._versions) 51 | 52 | @classmethod 53 | def _get_default(cls, uri: str) -> int: 54 | """ 55 | Get the default version of the API if none was specified. This takes into account 56 | the URI of the request. 57 | :param uri: The URI from the request being checked. 58 | """ 59 | first_uri_component = next((part for part in uri.split('/') if part != ''), None) 60 | if first_uri_component == cls.OLD_VERSIONED_URI_COMPONENT: 61 | return cls.get_first() 62 | else: 63 | return cls.get_latest() 64 | -------------------------------------------------------------------------------- /app/web_framework/cluster_application.py: -------------------------------------------------------------------------------- 1 | import tornado.web 2 | 3 | 4 | class ClusterApplication(tornado.web.Application): 5 | 6 | @staticmethod 7 | def get_all_handlers(root_route, default_params): 8 | """ 9 | Follows a route's decendents to return all routes in a form accepted as 'handlers' by Tornado. 10 | :param root_route: The base RouteNode 11 | :type root_route: web_framework.RouteNode 12 | :param default_params: The params to pass to the Tornado handler 13 | :type default_params: dict 14 | :return: Tornado handler tuples 15 | :rtype: list [tuple (str, tornado.web.RequestHandler, dict)] 16 | """ 17 | all_route_nodes = [root_route] + root_route.descendants() 18 | # Tornado handlers take the form of a tuple(regex, handler_class, parameters). The parameters start with 19 | # the common defaults provided and we append the RouteNode we are associating each handler with 20 | return [(route.regex(), route.handler, dict(default_params, route_node=route)) for route in all_route_nodes] 21 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | install: 2 | - "SET PATH=C:\\Python34;C:\\Python34\\Scripts\\;%PATH%" 3 | - pip install -r requirements.txt 4 | - pip install -r dev-requirements.txt 5 | 6 | environment: 7 | CR_VERBOSE: 1 8 | 9 | build: false # Not a C# project 10 | 11 | test_script: 12 | - nosetests -vv test 13 | - windows\run_cr_unit_on_cr.cmd 14 | -------------------------------------------------------------------------------- /bin/git_askpass.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script is a just a no-op dummy script that outputs nothing. It can be used as the target of $GIT_ASKPASS so that 4 | # all git prompts will be automatically filled with an invalid empty response, causing all prompts to fail. 5 | -------------------------------------------------------------------------------- /bin/git_ssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # A pass-through wrapper script around ssh that allows setting additional arguments via environment variable. It can be 4 | # used as the target of $GIT_SSH to enable setting git's ssh options in a script. 5 | 6 | GIT_SSH_ARGS=${GIT_SSH_ARGS:-""} # default to "" (no injected args) 7 | ssh ${GIT_SSH_ARGS} $@ 8 | -------------------------------------------------------------------------------- /clusterrunner.yaml: -------------------------------------------------------------------------------- 1 | Unit: 2 | commands: 3 | - nosetests --with-xunit --xunit-file $ARTIFACT_DIR/result.xml $TESTPATH 4 | atomizers: 5 | - TESTPATH: find "test/unit" -name "test_*.py" 6 | 7 | UnitOnWindows: 8 | commands: 9 | - nosetests --with-xunit --xunit-file !ARTIFACT_DIR!\result.xml !TESTPATH! 10 | atomizers: 11 | - TESTPATH: cd test\unit && dir test_*.py /b/s 12 | -------------------------------------------------------------------------------- /conf/default_clusterrunner.conf: -------------------------------------------------------------------------------- 1 | ## This file contains the default values for common configuration settings. ClusterRunner expects a 2 | ## clusterrunner.conf file in the user's ~/.clusterrunner directory (or a location specified on the command line), 3 | ## and it will copy this file to ~/.clusterrunner/clusterrunner.conf if that doesn't exist. Uncomment settings to 4 | ## change their defaults. Settings which are specified via the command line (e.g. clusterrunner master --port 43777) 5 | ## will override these values. 6 | 7 | [general] 8 | ## The root directory for files used during the build process. 9 | # base_directory = ~/.clusterrunner 10 | 11 | ## Symlinks to each build's project files are created here, to keep paths consistent across machines. 12 | # build_symlink_directory = /tmp/clusterrunner_build_symlinks 13 | 14 | ## The level to log at. Other options are DEBUG, INFO, NOTICE, WARNING, ERROR, and CRITIAL. 15 | # log_level = 'WARNING' 16 | 17 | ## A list of slaves, used for starting slaves with the "clusterrunner deploy" command 18 | # slaves = hostname01.example.com, hostname02.example.com, hostname03.example.com 19 | 20 | ## The hostname to refer to the local machine with 21 | # hostname = localhost 22 | 23 | ## Should we automatically reject all git remote operations on hosts that are not in known_hosts? 24 | # git_strict_host_key_checking = False 25 | 26 | ## CORS support - a regex to match against allowed API request origins, or None to disable CORS 27 | # cors_allowed_origins_regex = None 28 | 29 | ## Should the slaves get the project from master or not 30 | # get_project_from_master = True 31 | 32 | ## Secret string used for authenticating requests on master and slaves. By default, a secret is 33 | ## randomly generated on startup and saved. If the master and slave(s) are on different hosts, this 34 | ## value must be set to "None" to disable authentication, or to a common value for all hosts. 35 | # secret = None 36 | 37 | ## HTTPS certificate and key file locations 38 | ## If the certificate and/or key file not provided then ClusterRunner will run with HTTP protocol 39 | # https_cert_file = None 40 | # https_key_file = None 41 | 42 | [master] 43 | ## The port the master service will run on 44 | # port = 43000 45 | 46 | ## Interval after which master runs periodic cleanup to disconnect slaves that are not sending heartbeat 47 | # unresponsive_slaves_cleanup_interval = 600 48 | 49 | [slave] 50 | ## The port the slave service will run on 51 | # port = 43001 52 | 53 | ## The maximum number of parallel executions to run on this slave 54 | # num_executors = 1 55 | 56 | ## The master's hostname this slave will connect to 57 | # master_hostname = localhost 58 | 59 | ## The master's port this slave will connect to 60 | # master_port = 43000 61 | 62 | ## Configuration for heartbeat feature 63 | 64 | ## Interval between two heartbeats sent from a slave 65 | # heartbeat_interval = 60 66 | 67 | ## Number of heartbeat failures after which a slave determines the master is unreachable 68 | # heartbeat_failure_threshold = 10 69 | -------------------------------------------------------------------------------- /dev-requirements.in: -------------------------------------------------------------------------------- 1 | coverage 2 | genty 3 | hypothesis 4 | nose 5 | nosexcover 6 | pep8 7 | pex 8 | pylint 9 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile --output-file dev-requirements.txt dev-requirements.in 6 | # 7 | 8 | astroid==1.4.9 # via pylint 9 | coverage==3.7.1 10 | genty==1.1.0 11 | hypothesis==1.9.0 12 | isort==4.3.4 # via pylint 13 | lazy-object-proxy==1.3.1 # via astroid 14 | mccabe==0.6.1 # via pylint 15 | nose==1.3.4 16 | nosexcover==1.0.10 17 | pep8==1.5.7 18 | pex==1.3.1 19 | pylint==1.6.5 20 | six==1.11.0 # via astroid, genty, pylint 21 | wheel==0.29.0 # via pex 22 | wrapt==1.10.11 # via astroid 23 | -------------------------------------------------------------------------------- /examples/directory job/clusterrunner.yaml: -------------------------------------------------------------------------------- 1 | SampleJob: 2 | max_executors: 2 3 | max_executors_per_slave: 5 4 | setup_build: 5 | - echo "run setup" 6 | commands: 7 | - sleep 1 8 | - echo "Here is the artifact for $MY_MESSAGE." > $ARTIFACT_DIR/result.txt 9 | atomizers: 10 | - MY_MESSAGE: printf 'atom number %d\n' {1..10} 11 | teardown_build: 12 | - echo "run teardown" 13 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | configobj 2 | fysom 3 | logbook 4 | prometheus_client 5 | psutil 6 | pyyaml 7 | requests 8 | termcolor 9 | tornado 10 | typing 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile --output-file requirements.txt requirements.in 6 | # 7 | 8 | configobj==5.0.6 9 | fysom==2.1.2 10 | logbook==0.7.0 11 | prometheus-client==0.0.19 12 | psutil==2.2.0 13 | pyyaml==3.11 14 | requests==2.3.0 15 | six==1.11.0 # via configobj 16 | termcolor==1.1.0 17 | tornado==3.2.2 18 | typing==3.6.1 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | try: 4 | from pip.req import parse_requirements # pip 9.x 5 | except ImportError: 6 | from pip._internal.req import parse_requirements # pip 10.x 7 | from setuptools import find_packages, setup 8 | 9 | from app.util import autoversioning 10 | 11 | version = autoversioning.get_version() 12 | 13 | # bdist_pex runs in a temp dir, therefore requirements.txt must be added to data_files. 14 | requirements = [str(r.req) for r in parse_requirements('requirements.txt', session=False)] 15 | 16 | name = 'clusterrunner' 17 | 18 | setup( 19 | name=name, 20 | version=version, 21 | description="ClusterRunner makes it easy to execute test suites across your " 22 | "infrastructure in the fastest and most efficient way possible.", 23 | maintainer="Box", 24 | maintainer_email="productivity@box.com", 25 | url="https://github.com/box/ClusterRunner", 26 | license="ASL 2.0", 27 | 28 | python_requires='>=3.4', 29 | packages=find_packages(exclude=('test', 'test.*')), 30 | # Data files are packaged into the wheel using the following defines. 31 | data_files=[ 32 | ('', ['requirements.txt']), 33 | ('conf', ['conf/default_clusterrunner.conf']), 34 | ], 35 | install_requires=requirements, 36 | entry_points={ 37 | 'console_scripts': ['{} = app.__main__:main'.format(name)], 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | Quick setup for OS X 2 | ----------- 3 | ```bash 4 | # Clone repo 5 | git clone https://github.com/box/ClusterRunner.git 6 | cd ClusterRunner 7 | 8 | # Create a Python 3.4 virtualenv using your preferred method. 9 | # See below for steps on doing this via Pyenv. 10 | 11 | # Install ClusterRunner dependencies 12 | make init-dev 13 | ``` 14 | 15 | 16 | Run tests 17 | -------------- 18 | ```bash 19 | make test 20 | # or... 21 | nosetests test/unit/ 22 | nosetests test/functional/ 23 | 24 | # or run the functional tests with verbose logging 25 | export CR_VERBOSE=1 26 | nosetests -s -v test/functional/ 27 | ``` 28 | 29 | Run lint & tests in docker (no need for any setup on local machine) 30 | -------------- 31 | ```bash 32 | make docker-lint 33 | make docker-test 34 | # or... 35 | docker build --target builder -t productivity/clusterrunner-tests -f Dockerfile . 36 | docker run --rm productivity/clusterrunner-tests make lint 37 | docker run --rm productivity/clusterrunner-tests make test-unit 38 | docker run --rm productivity/clusterrunner-tests test-integration 39 | docker run --rm productivity/clusterrunner-tests test-functional 40 | 41 | # or run the functional tests with verbose logging 42 | docker build --target builder -t productivity/clusterrunner-tests -f Dockerfile . 43 | docker run -e CR_VERBOSE=1 --rm productivity/clusterrunner-tests nosetests -s -v test/functional/ 44 | ``` 45 | 46 | 47 | Set up Python 3.4 using Pyenv 48 | --------------- 49 | This is the preferred method since installing Python 3.4 via Homebrew is no longer easy. 50 | ```bash 51 | # Install pyenv (Instructions from https://github.com/pyenv/pyenv#installation) 52 | brew update 53 | brew install pyenv 54 | 55 | # Add pyenv init to your shell startup file 56 | echo 'eval "$(pyenv init -)"' >> ~/.bash_profile # replace .bash_profile with whatever you use (.bashrc, .profile, etc.) 57 | 58 | # Install Python 3.4 59 | pyenv install 3.4.8 # use latest 3.4.X 60 | 61 | # Use pyenv-virtualenv to manage venvs (https://github.com/pyenv/pyenv-virtualenv) 62 | brew install pyenv-virtualenv 63 | echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bash_profile # replace .bash_profile with whatever you use (.bashrc, .profile, etc.) 64 | 65 | # Create a virtualenv for ClusterRunner 66 | cd ClusterRunner 67 | pyenv virtualenv 3.4.8 cr 68 | pyenv local cr # auto-activate virtualenv when entering this directory 69 | make init-dev 70 | ``` 71 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/__init__.py -------------------------------------------------------------------------------- /test/framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/framework/__init__.py -------------------------------------------------------------------------------- /test/framework/base_integration_test_case.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | 4 | class BaseIntegrationTestCase(TestCase): 5 | pass 6 | -------------------------------------------------------------------------------- /test/framework/comparators.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | class AnyStringMatching(object): 5 | """ 6 | A helper object that compares equal to any string matching the specified pattern. 7 | """ 8 | def __init__(self, regex_pattern): 9 | self._matcher = re.compile(regex_pattern) 10 | 11 | def __eq__(self, other): 12 | match = self._matcher.search(str(other)) 13 | return isinstance(other, str) and match is not None 14 | 15 | def __repr__(self): 16 | return ''.format(self._matcher.pattern) 17 | 18 | 19 | class AnythingOfType(object): 20 | """ 21 | A helper object that compares equal to any object of the specified type. 22 | """ 23 | def __init__(self, accepted_type): 24 | self._type = accepted_type 25 | 26 | def __eq__(self, other): 27 | return isinstance(other, self._type) 28 | 29 | def __repr__(self): 30 | return ''.format(self._type) 31 | -------------------------------------------------------------------------------- /test/framework/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/framework/functional/__init__.py -------------------------------------------------------------------------------- /test/framework/pylint/__init__.py: -------------------------------------------------------------------------------- 1 | from test.framework.pylint.clusterrunner_token_checker import ClusterRunnerTokenChecker 2 | 3 | 4 | def register(linter): 5 | """ 6 | Register custom lint checkers with pylint. Any new checkers should also be registered here. 7 | """ 8 | linter.register_checker(ClusterRunnerTokenChecker(linter)) 9 | -------------------------------------------------------------------------------- /test/framework/pylint/clusterrunner_token_checker.py: -------------------------------------------------------------------------------- 1 | import tokenize 2 | 3 | from pylint.checkers import BaseChecker 4 | from pylint.interfaces import ITokenChecker 5 | 6 | 7 | class ClusterRunnerTokenChecker(BaseChecker): 8 | """ 9 | Contains token-based lint checkers for ClusterRunner. 10 | This checker should be registered in its package's __init__.py. 11 | """ 12 | __implements__ = ITokenChecker 13 | 14 | name = 'clusterrunner_token_checker' 15 | msgs = { 16 | 'CR001': ( 17 | 'Comment contains disallowed string "DO NOT COMMIT"', 18 | 'clusterrunner-do-not-commit', 19 | 'Used when a comment contains the string "DO NOT COMMIT". ' 20 | 'These are generated files that should not be committed.'), 21 | } 22 | 23 | def process_tokens(self, tokens): 24 | """ 25 | Superclass override: receives a generator object containing all the tokens in the current file being linted. 26 | 27 | :type tokens: collections.Iterable[tokenize.TokenInfo] 28 | """ 29 | for token in tokens: 30 | self._check_token_for_do_not_commit_string(token) 31 | 32 | def _check_token_for_do_not_commit_string(self, token): 33 | """ 34 | Check for a "DO NOT COMMIT" string in comments. This comment is used in autogenerated files (e.g., 35 | package_version.py) that should not be committed. 36 | 37 | :type token: tokenize.TokenInfo 38 | """ 39 | if token.type == tokenize.COMMENT and 'DO NOT COMMIT' in token.string: 40 | line_number = token.start[0] 41 | self.add_message('clusterrunner-do-not-commit', line=line_number) 42 | -------------------------------------------------------------------------------- /test/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/functional/__init__.py -------------------------------------------------------------------------------- /test/functional/heartbeat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/functional/heartbeat/__init__.py -------------------------------------------------------------------------------- /test/functional/heartbeat/test_heartbeat.py: -------------------------------------------------------------------------------- 1 | from requests.exceptions import ConnectionError 2 | from unittest import skipIf 3 | 4 | from app.util.process_utils import is_windows 5 | from test.framework.functional.base_functional_test_case import BaseFunctionalTestCase 6 | 7 | @skipIf(is_windows(), 'Fails on AppVeyor; see issue #345') 8 | class TestHeartbeat(BaseFunctionalTestCase): 9 | def test_slave_failure_should_mark_slave_offline(self): 10 | master = self.cluster.start_master(unresponsive_slaves_cleanup_interval=5) 11 | slaves = self.cluster.start_slave(num_executors_per_slave=1, start_port=43001, heartbeat_interval=5, 12 | heartbeat_failure_threshold=1) 13 | 14 | # verify that the slave is connected 15 | self.assertEqual(True, master.get_slave_status(1).get('is_alive')) 16 | 17 | # kill the slave in non graceful manner and verify that master still thinks it is connected 18 | self.cluster.kill_slaves(kill_gracefully=False) 19 | self.cluster.block_until_n_slaves_dead(1,5) 20 | self.assertEqual(True, master.get_slave_status(1).get('is_alive')) 21 | 22 | # wait for the next heartbeat run which marks the slave offline 23 | self.cluster.block_until_n_slaves_marked_dead_in_master(1,10) 24 | self.assertEqual(False, master.get_slave_status(1).get('is_alive')) 25 | 26 | def test_master_failure_should_kill_the_slave_process(self): 27 | master = self.cluster.start_master(unresponsive_slaves_cleanup_interval=5) 28 | slave = self.cluster.start_slave(num_executors_per_slave=1, start_port=43001, heartbeat_interval=5, 29 | heartbeat_failure_threshold=1) 30 | # verify that the slave is connected 31 | self.assertEqual(True,slave.get_slave_status()['slave']['is_alive']) 32 | 33 | # kill the master and verify that the slave dies after heartbeat failure 34 | self.cluster.kill_master() 35 | self.cluster.block_until_n_slaves_dead(1,40) 36 | self.assertRaises(ConnectionError, slave.get_slave_status) 37 | 38 | -------------------------------------------------------------------------------- /test/functional/master/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/functional/master/__init__.py -------------------------------------------------------------------------------- /test/functional/master/test_api_version_requests.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from test.framework.functional.base_functional_test_case import BaseFunctionalTestCase 4 | 5 | @genty 6 | class TestMasterAPIVersionRequests(BaseFunctionalTestCase): 7 | 8 | def _build_accept_header_with_api_version(self, version: int): 9 | header = 'Accept' 10 | value = 'application/vnd.clusterrunner.v{}+json'.format(version) 11 | return {header: value} 12 | 13 | @genty_dataset( 14 | no_accept_header=(None, 1), 15 | v1_accept_header=(1, 1), 16 | v2_accept_header=(2, 2), 17 | invalid_version_accept_header=(999, 1), 18 | ) 19 | def test_api_version_with_accept_header(self, version: int, exp_version: int): 20 | master = self.cluster.start_master() 21 | version_url = master._api.url('version') 22 | header = self._build_accept_header_with_api_version(version) if version else None 23 | resp = master._network.get(version_url, headers=header).json() 24 | self.assertEqual(resp['api_version'], exp_version) 25 | -------------------------------------------------------------------------------- /test/functional/master/test_console_output.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import yaml 5 | 6 | from test.framework.functional.base_functional_test_case import BaseFunctionalTestCase 7 | from test.functional.job_configs import JOB_WITH_SETUP_AND_TEARDOWN 8 | 9 | 10 | class TestConsoleOutput(BaseFunctionalTestCase): 11 | 12 | def setUp(self): 13 | super().setUp() 14 | self.project_dir = tempfile.TemporaryDirectory() 15 | 16 | def test_logs_are_still_available_after_slave_goes_offline(self): 17 | master = self.cluster.start_master() 18 | self.cluster.start_slave() 19 | build_resp = master.post_new_build({ 20 | 'type': 'directory', 21 | 'config': yaml.safe_load(JOB_WITH_SETUP_AND_TEARDOWN.config[os.name])['JobWithSetupAndTeardown'], 22 | 'project_directory': self.project_dir.name, 23 | }) 24 | build_id = build_resp['build_id'] 25 | self.assertTrue(master.block_until_build_finished(build_id, timeout=30), 26 | 'The build should finish building within the timeout.') 27 | self.assert_build_has_successful_status(build_id) 28 | 29 | # Bring down the single slave and assert that console output for the build is still available. 30 | self.cluster.kill_slaves() 31 | 32 | console_output_1 = master.get_console_output(build_id=build_id, subjob_id=0, atom_id=0) 33 | self.assertEqual( 34 | console_output_1['content'].strip(), 35 | 'Doing subjob 1.' 36 | ) 37 | console_output_2 = master.get_console_output(build_id=build_id, subjob_id=1, atom_id=0) 38 | self.assertEqual( 39 | console_output_2['content'].strip(), 40 | 'Doing subjob 2.' 41 | ) 42 | -------------------------------------------------------------------------------- /test/functional/master/test_deallocation_and_allocation_of_slaves_mid_build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from unittest import skipIf 4 | import yaml 5 | 6 | from app.util.process_utils import is_windows 7 | from test.framework.functional.base_functional_test_case import BaseFunctionalTestCase 8 | from test.functional.job_configs import JOB_WITH_SLEEPS 9 | 10 | 11 | @skipIf(is_windows(), 'Fails on AppVeyor; see issue #345') 12 | class TestDeallocationAndAllocationOfSlavesMidBuild(BaseFunctionalTestCase): 13 | def test_build_completes_after_allocating_deallocating_and_reallocating_slaves_to_build(self): 14 | master = self.cluster.start_master() 15 | # Only one slave, with one executor. This means that the slave should be able to 16 | # theoretically finish the build in 5 seconds, as this job definition has 5 atoms, 17 | # with each sleeping for 1 second. 18 | self.cluster.start_slaves(1, num_executors_per_slave=1, start_port=43001) 19 | project_dir = tempfile.TemporaryDirectory() 20 | build_resp = master.post_new_build({ 21 | 'type': 'directory', 22 | 'config': yaml.safe_load(JOB_WITH_SLEEPS.config[os.name])['BasicSleepingJob'], 23 | 'project_directory': project_dir.name, 24 | }) 25 | build_id = build_resp['build_id'] 26 | self.assertTrue(master.block_until_build_started(build_id, timeout=30), 27 | 'The build should start building within the timeout.') 28 | master.graceful_shutdown_slaves_by_id([1]) 29 | self.cluster.block_until_n_slaves_dead(num_slaves=1, timeout=10) 30 | self.cluster.kill_slaves(kill_gracefully=False) 31 | self.assert_build_status_contains_expected_data(build_id, {'status': 'BUILDING'}) 32 | self.cluster.start_slaves(1, num_executors_per_slave=1, start_port=43001) 33 | self.assertTrue(master.block_until_build_finished(build_id, timeout=30), 34 | 'The build should finish building within the timeout.') 35 | self.assert_build_has_successful_status(build_id) 36 | -------------------------------------------------------------------------------- /test/functional/master/test_endpoints.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import yaml 5 | 6 | from test.framework.functional.base_functional_test_case import BaseFunctionalTestCase 7 | from test.functional.job_configs import BASIC_JOB 8 | 9 | 10 | class TestMasterEndpoints(BaseFunctionalTestCase): 11 | 12 | def setUp(self): 13 | super().setUp() 14 | self._project_dir = tempfile.TemporaryDirectory() 15 | 16 | def _start_master_only_and_post_a_new_job(self): 17 | master = self.cluster.start_master() 18 | build_resp = master.post_new_build({ 19 | 'type': 'directory', 20 | 'config': yaml.safe_load(BASIC_JOB.config[os.name])['BasicJob'], 21 | 'project_directory': self._project_dir.name, 22 | }) 23 | build_id = build_resp['build_id'] 24 | return master, build_id 25 | 26 | def test_cancel_build(self): 27 | master, build_id = self._start_master_only_and_post_a_new_job() 28 | 29 | master.cancel_build(build_id) 30 | self.assertTrue(master.block_until_build_finished(build_id, timeout=30), 31 | 'The build should finish building within the timeout.') 32 | 33 | self.assert_build_has_canceled_status(build_id=build_id) 34 | 35 | def test_get_artifact_before_it_is_ready(self): 36 | master, build_id = self._start_master_only_and_post_a_new_job() 37 | 38 | # Since we didn't start any slaves so the artifacts is actually not ready. 39 | _, status_code = master.get_build_artifacts(build_id) 40 | self.assertEqual(status_code, 202) 41 | 42 | # Cancel the started build just to speed up teardown (avoid teardown timeout waiting for empty queue) 43 | master.cancel_build(build_id) 44 | -------------------------------------------------------------------------------- /test/functional/master/test_shutdown.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import yaml 4 | 5 | from test.framework.functional.base_functional_test_case import BaseFunctionalTestCase 6 | from test.functional.job_configs import JOB_WITH_SETUP_AND_TEARDOWN 7 | 8 | 9 | class TestShutdown(BaseFunctionalTestCase): 10 | 11 | def test_shutdown_all_slaves_should_kill_and_remove_all_slaves(self): 12 | master = self.cluster.start_master() 13 | self.cluster.start_slaves(2) 14 | 15 | master.graceful_shutdown_all_slaves() 16 | 17 | slaves_response = master.get_slaves() 18 | slaves = slaves_response['slaves'] 19 | 20 | self.assertEqual(0, len(slaves)) 21 | 22 | self.cluster.block_until_n_slaves_dead(2, 10) 23 | 24 | def test_shutdown_one_slave_should_leave_one_slave_alive_and_remove_shutdowned_slave(self): 25 | master = self.cluster.start_master() 26 | self.cluster.start_slaves(2) 27 | 28 | master.graceful_shutdown_slaves_by_id([1]) 29 | 30 | slaves_response = master.get_slaves() 31 | slaves = slaves_response['slaves'] 32 | living_slaves = [slave for slave in slaves if slave['is_alive']] 33 | 34 | self.assertEqual(1, len(living_slaves)) 35 | self.assertEqual(1, len(slaves)) 36 | 37 | self.cluster.block_until_n_slaves_dead(1, 10) 38 | 39 | def test_shutdown_all_slaves_while_build_is_running_should_finish_build_then_kill_and_remove_slaves(self): 40 | master = self.cluster.start_master() 41 | self.cluster.start_slaves(2) 42 | 43 | project_dir = tempfile.TemporaryDirectory() 44 | build_resp = master.post_new_build({ 45 | 'type': 'directory', 46 | 'config': yaml.safe_load(JOB_WITH_SETUP_AND_TEARDOWN.config[os.name])['JobWithSetupAndTeardown'], 47 | 'project_directory': project_dir.name, 48 | }) 49 | build_id = build_resp['build_id'] 50 | self.assertTrue(master.block_until_build_started(build_id, timeout=30), 51 | 'The build should start building within the timeout.') 52 | 53 | # Shutdown one on the slaves and test if the build can still complete 54 | master.graceful_shutdown_slaves_by_id([1]) 55 | 56 | self.assertTrue(master.block_until_build_finished(build_id, timeout=30), 57 | 'The build should finish building within the timeout.') 58 | self.assert_build_has_successful_status(build_id=build_id) 59 | 60 | slaves_response = master.get_slaves() 61 | slaves = slaves_response['slaves'] 62 | living_slaves = [slave for slave in slaves if slave['is_alive']] 63 | 64 | self.assertEqual(1, len(living_slaves)) 65 | self.assertEqual(1, len(slaves)) 66 | -------------------------------------------------------------------------------- /test/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/integration/__init__.py -------------------------------------------------------------------------------- /test/integration/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/integration/common/__init__.py -------------------------------------------------------------------------------- /test/integration/common/test_build_artifact.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from tempfile import mkstemp, TemporaryDirectory 4 | 5 | from genty import genty, genty_dataset 6 | 7 | from app.common.build_artifact import BuildArtifact 8 | from app.util import fs 9 | from test.framework.base_integration_test_case import BaseIntegrationTestCase 10 | 11 | 12 | @genty 13 | class TestBuildArtifact(BaseIntegrationTestCase): 14 | @classmethod 15 | def setUpClass(cls): 16 | # For timing file test 17 | cls._timing_file_fd, cls._timing_file_path = mkstemp() 18 | 19 | # For parsing subjob/atom ids from build artifact test. 20 | cls._artifact_directory_path = TemporaryDirectory().name 21 | fs.write_file('0', os.path.join(cls._artifact_directory_path, 'artifact_1_0', 'clusterrunner_exit_code')) 22 | fs.write_file('1', os.path.join(cls._artifact_directory_path, 'artifact_1_1', 'clusterrunner_exit_code')) 23 | fs.write_file('0', os.path.join(cls._artifact_directory_path, 'artifact_2_0', 'clusterrunner_exit_code')) 24 | fs.write_file('1', os.path.join(cls._artifact_directory_path, 'artifact_2_1', 'clusterrunner_exit_code')) 25 | 26 | @classmethod 27 | def tearDownClass(cls): 28 | os.close(cls._timing_file_fd) 29 | os.remove(cls._timing_file_path) 30 | 31 | @genty_dataset( 32 | mutually_exclusive=({'1': 1, '2': 2}, {'3': 3}, {'1': 1, '2': 2, '3': 3}), 33 | entire_overlap=({'1': 1, '2': 2}, {'1': 3, '2': 4}, {'1': 3, '2': 4}), 34 | some_overlap=({'1': 1, '2': 2}, {'2': 4, '3': 5}, {'1': 1, '2': 4, '3': 5}), 35 | ) 36 | def test_update_timing_file(self, existing_timing_data, new_timing_data, expected_final_timing_data): 37 | fs.write_file(json.dumps(existing_timing_data), self._timing_file_path) 38 | build_artifact = BuildArtifact('/some/dir/doesnt/matter') 39 | build_artifact._update_timing_file(self._timing_file_path, new_timing_data) 40 | 41 | with open(self._timing_file_path, 'r') as timing_file: 42 | updated_timing_data = json.load(timing_file) 43 | 44 | self.assertDictEqual(updated_timing_data, expected_final_timing_data) 45 | 46 | def test_get_failed_subjob_and_atom_ids_returns_correct_ids(self): 47 | # Build artifact directory: 48 | # artifact_1_0/clusterrunner_exit_code -> 0 49 | # artifact_1_1/clusterrunner_exit_code -> 1 50 | # artifact_2_0/clusterrunner_exit_code -> 0 51 | # artifact_2_1/clusterrunner_exit_code -> 1 52 | # Expected to return: [(1,1), (2,1)] 53 | build_artifact = BuildArtifact(self._artifact_directory_path) 54 | failed_subjob_and_atoms = build_artifact.get_failed_subjob_and_atom_ids() 55 | self.assertCountEqual(failed_subjob_and_atoms, [(1, 1), (2, 1)]) 56 | -------------------------------------------------------------------------------- /test/integration/master/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/integration/master/__init__.py -------------------------------------------------------------------------------- /test/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/__init__.py -------------------------------------------------------------------------------- /test/unit/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/client/__init__.py -------------------------------------------------------------------------------- /test/unit/client/test_config_validator.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /test/unit/client/test_service_runner.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, call, ANY 2 | 3 | from app.client.service_runner import ServiceRunner, ServiceRunError 4 | from test.framework.base_unit_test_case import BaseUnitTestCase 5 | 6 | 7 | class TestServiceRunner(BaseUnitTestCase): 8 | 9 | def setUp(self): 10 | super().setUp() 11 | self.mock_Popen = self.patch('app.client.service_runner.Popen_with_delayed_expansion') 12 | self.mock_Network = self.patch('app.client.service_runner.Network') 13 | self.mock_time = self.patch('app.client.service_runner.time') 14 | 15 | def test_run_master_invokes_popen(self): 16 | self.mock_time.time.side_effect = range(1000) 17 | mock_network = self.mock_Network.return_value 18 | mock_network.get.return_value = Mock(ok=False) 19 | try: 20 | service_runner = ServiceRunner('frodo:1') 21 | service_runner.run_master() 22 | except ServiceRunError: 23 | pass 24 | 25 | self.assertEqual(call([ANY, ANY, 'master', '--port', '1'], stdout=ANY), self.mock_Popen.call_args) 26 | 27 | def test_run_master_does_not_invoke_popen_if_resp_is_ok(self): 28 | mock_network = self.mock_Network.return_value 29 | mock_network.get.return_value = Mock(ok=True) 30 | try: 31 | service_runner = ServiceRunner('frodo:1') 32 | service_runner.run_master() 33 | except ServiceRunError: 34 | pass 35 | 36 | assert not self.mock_Popen.called 37 | -------------------------------------------------------------------------------- /test/unit/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/common/__init__.py -------------------------------------------------------------------------------- /test/unit/common/test_build_artifact.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | from os.path import expanduser, join 3 | 4 | from app.common.build_artifact import BuildArtifact 5 | from app.util.conf.configuration import Configuration 6 | from test.framework.base_unit_test_case import BaseUnitTestCase 7 | 8 | @genty 9 | class TestBuildArtifact(BaseUnitTestCase): 10 | def setUp(self): 11 | super().setUp() 12 | Configuration['artifact_directory'] = expanduser('~') 13 | 14 | @genty_dataset( 15 | default=(join(expanduser('~'), '1', 'artifact_2_3'), 1, 2, 3), 16 | with_nondefault_root=(join('override', '1', 'artifact_2_3'), 1, 2, 3, join('override')), 17 | ) 18 | def test_atom_artifact_directory_returns_proper_artifact_path(self, expected_path, build_id, subjob_id=None, 19 | atom_id=None, result_root=None): 20 | self.assertEquals( 21 | expected_path, 22 | BuildArtifact.atom_artifact_directory(build_id, subjob_id, atom_id, result_root=result_root), 23 | 'The generated atom artifact directory is incorrect.' 24 | ) 25 | 26 | @genty_dataset( 27 | default=(join(expanduser('~'), '1'), 1), 28 | with_nondefault_root=(join('override', '1'), 1, join('override')), 29 | ) 30 | def test_build_artifact_directory_returns_proper_artifact_path(self, expected_path, build_id, result_root=None): 31 | self.assertEquals( 32 | expected_path, 33 | BuildArtifact.build_artifact_directory(build_id, result_root=result_root), 34 | 'The generated build artifact directory is incorrect.' 35 | ) 36 | 37 | @genty_dataset( 38 | relative_path=('artifact_0_1', 0, 1), 39 | absolute_path=('/path/to/build/1/artifact_0_1', 0, 1), 40 | ) 41 | def test_subjob_and_atom_ids_parses_for_properly_formatted_directory(self, artifact_directory, expected_subjob_id, 42 | expected_atom_id): 43 | subjob_id, atom_id = BuildArtifact._subjob_and_atom_ids(artifact_directory) 44 | self.assertEquals(subjob_id, expected_subjob_id) 45 | self.assertEquals(atom_id, expected_atom_id) 46 | 47 | @genty_dataset( 48 | 'artifact_0', 49 | '/full/path/artifact_0', 50 | 'wrong_0_1', 51 | 'artifact_0_', 52 | ) 53 | def test_subjob_and_atom_ids_raises_value_error_with_incorrect_format(self, incorrect_artifact_directory): 54 | with self.assertRaises(ValueError): 55 | BuildArtifact._subjob_and_atom_ids(incorrect_artifact_directory) 56 | 57 | -------------------------------------------------------------------------------- /test/unit/common/test_cluster_service.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | import os 3 | 4 | from app.common.console_output_segment import ConsoleOutputSegment 5 | from app.common.cluster_service import ClusterService 6 | from app.util.exceptions import BadRequestError, ItemNotFoundError 7 | from test.framework.base_unit_test_case import BaseUnitTestCase 8 | 9 | 10 | @genty 11 | class TestClusterService(BaseUnitTestCase): 12 | def test_get_console_output_happy_path_returns_return_values(self): 13 | segment = ConsoleOutputSegment(offset_line=0, num_lines=1, total_num_lines=2, content='The content\n') 14 | self.patch('app.common.cluster_service.BuildArtifact').get_console_output.return_value = segment 15 | service = ClusterService() 16 | 17 | response = service.get_console_output(1, 2, 3, os.path.abspath('~')) 18 | 19 | self.assertDictEqual( 20 | response, 21 | { 22 | 'offset_line': 0, 23 | 'num_lines': 1, 24 | 'total_num_lines': 2, 25 | 'content': 'The content\n', 26 | }, 27 | 'The response dictionary did not contain the expected contents.' 28 | ) 29 | 30 | @genty_dataset( 31 | zero_max_lines=(0, None), 32 | negative_max_lines=(-1, None), 33 | negative_offset_line=(1, -1), 34 | ) 35 | def test_get_console_output_raises_bad_request_error_with_invalid_arguments(self, max_lines, offset_line): 36 | service = ClusterService() 37 | 38 | with self.assertRaises(BadRequestError): 39 | service.get_console_output(1, 2, 3, os.path.abspath('~'), max_lines=max_lines, offset_line=offset_line) 40 | 41 | def test_get_console_output_raises_item_not_found_error_if_console_output_file_doesnt_exist(self): 42 | self.patch('app.common.cluster_service.BuildArtifact').get_console_output.return_value = None 43 | service = ClusterService() 44 | 45 | with self.assertRaises(ItemNotFoundError): 46 | service.get_console_output(1, 2, 3, os.path.abspath('~')) 47 | -------------------------------------------------------------------------------- /test/unit/deployment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/deployment/__init__.py -------------------------------------------------------------------------------- /test/unit/deployment/test_remote_master_service.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | from app.deployment.remote_master_service import RemoteMasterService 4 | from test.framework.base_unit_test_case import BaseUnitTestCase 5 | 6 | 7 | class TestRemoteMasterService(BaseUnitTestCase): 8 | def setUp(self): 9 | super().setUp() 10 | self.patch('time.sleep') 11 | 12 | def test_start_and_block_until_up_raises_exception_if_process_fails_to_start(self): 13 | self._mock_shell_exec_command({ 14 | 'nohup some_path master --port 43000 &': "\n", 15 | 'ps ax | grep \'[s]ome_path\'': "\n", 16 | }) 17 | remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') 18 | with self.assertRaisesRegex(SystemExit, '1'): 19 | remote_master_service.start_and_block_until_up(43000, 5) 20 | 21 | def test_start_and_block_until_up_raises_exception_if_process_starts_by_service_doesnt_respond(self): 22 | self._mock_shell_exec_command({ 23 | 'nohup some_path master --port 43000 &': "\n", 24 | 'ps ax | grep \'[s]ome_path\'': "\nsome_path\n", 25 | }) 26 | self.patch('app.deployment.remote_master_service.ServiceRunner').return_value.is_up.return_value = False 27 | remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') 28 | with self.assertRaisesRegex(SystemExit, '1'): 29 | remote_master_service.start_and_block_until_up(43000, 5) 30 | 31 | def test_start_and_block_until_up_doesnt_raise_exception_if_master_service_is_up(self): 32 | self._mock_shell_exec_command({ 33 | 'nohup some_path master --port 43000 &': "\n", 34 | 'ps ax | grep \'[s]ome_path\'': "\nsome_path\n", 35 | }) 36 | self.patch('app.deployment.remote_master_service.ServiceRunner').return_value.is_up.return_value = True 37 | remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') 38 | remote_master_service.start_and_block_until_up(43000, 5) 39 | 40 | def test_is_process_running_returns_false_if_only_empty_output(self): 41 | self._mock_shell_exec_command({'ps ax | grep \'[s]ome_command\'': "\n"}) 42 | remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') 43 | self.assertFalse(remote_master_service._is_process_running('some_command')) 44 | 45 | def test_is_process_running_returns_true_if_found_non_empty_output(self): 46 | self._mock_shell_exec_command({'ps ax | grep \'[s]ome_command\'': "\nrealoutput\n"}) 47 | remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') 48 | self.assertTrue(remote_master_service._is_process_running('some_command')) 49 | 50 | def _mock_shell_exec_command(self, command_response_dict): 51 | """ 52 | :param command_response_dict: a dictionary with the key being the expected input, and the value being the 53 | raw output that will be returned. 54 | :type command_response_dict: dict[str, str] 55 | """ 56 | def exec_command(*args, **kwargs): 57 | nonlocal command_response_dict 58 | if args[0] in command_response_dict: 59 | response_mock = Mock() 60 | response_mock.raw_output = command_response_dict[args[0]].encode('utf-8') 61 | response_mock.raw_error = None 62 | response_mock.returncode = 0 63 | return response_mock 64 | 65 | shell_client_mock = self.patch('app.util.shell.remote_shell_client.RemoteShellClient').return_value 66 | shell_client_mock.exec_command.side_effect = exec_command 67 | self.patch('app.deployment.remote_service.ShellClientFactory').create.return_value = shell_client_mock -------------------------------------------------------------------------------- /test/unit/deployment/test_remote_slave_service.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from unittest.mock import Mock 3 | 4 | from genty import genty, genty_dataset 5 | 6 | from app.deployment.remote_slave_service import RemoteSlaveService 7 | from app.util.shell.shell_client import ShellClient 8 | from test.framework.base_unit_test_case import BaseUnitTestCase 9 | 10 | 11 | @genty 12 | class TestRemoteSlaveService(BaseUnitTestCase): 13 | 14 | _HOST_NAME = socket.gethostname() 15 | 16 | def _patch_shell_client_factory(self): 17 | mock_shell_client_factory = self.patch('app.deployment.remote_service.ShellClientFactory') 18 | mock_shell_client = Mock(ShellClient) 19 | mock_shell_client_factory.create.return_value = mock_shell_client 20 | return mock_shell_client 21 | 22 | @genty_dataset( 23 | connect_to_master_host=('host1', 'username1', '/path/to/exec1', 'master_host', 43000, 43001, 10), 24 | connect_to_localhost=('host2', 'username2', '/path/to/exec2', 'localhost', 123, 321, 30), 25 | ) 26 | def test_start(self, host, username, executable_path, master_host, master_port, slave_port, num_executors): 27 | # Arrange 28 | mock_shell_client = self._patch_shell_client_factory() 29 | 30 | # Act 31 | remote_slave_service = RemoteSlaveService(host, username, executable_path) 32 | remote_slave_service.start(master_host, master_port, slave_port, num_executors) 33 | 34 | # Assert 35 | mock_shell_client.exec_command.assert_called_once_with( 36 | 'nohup {} slave --master-url {}:{} --port {} --num-executors {} &'.format( 37 | executable_path, 38 | master_host if master_host != 'localhost' else self._HOST_NAME, 39 | master_port, 40 | slave_port, 41 | num_executors, 42 | ), 43 | async=True, 44 | ) 45 | -------------------------------------------------------------------------------- /test/unit/master/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/master/__init__.py -------------------------------------------------------------------------------- /test/unit/master/test_atomizer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | from app.master.atomizer import Atomizer, AtomizerError 4 | from app.project_type.project_type import ProjectType 5 | from app.util.process_utils import get_environment_variable_setter_command 6 | from test.framework.base_unit_test_case import BaseUnitTestCase 7 | 8 | 9 | _FAKE_ATOMIZER_COMMAND = 'find . -name test_*.py' 10 | _FAKE_ATOMIZER_COMMAND_OUTPUT = '/tmp/test/directory/test_a.py\n/tmp/test/directory/test_b.py\n/tmp/test/directory/test_c.py\n' 11 | _SUCCESSFUL_EXIT_CODE = 0 12 | _FAILING_EXIT_CODE = 1 13 | 14 | 15 | class TestAtomizer(BaseUnitTestCase): 16 | def test_atomizer_returns_expected_atom_list(self): 17 | mock_project = Mock(spec=ProjectType) 18 | mock_project.execute_command_in_project.return_value = (_FAKE_ATOMIZER_COMMAND_OUTPUT, _SUCCESSFUL_EXIT_CODE) 19 | mock_project.project_directory = '/tmp/test/directory' 20 | 21 | atomizer = Atomizer([{'TEST_FILE': _FAKE_ATOMIZER_COMMAND}]) 22 | actual_atoms = atomizer.atomize_in_project(mock_project) 23 | actual_atom_commands = [atom.command_string for atom in actual_atoms] 24 | 25 | expected_atom_commands = [ 26 | get_environment_variable_setter_command('TEST_FILE', '$PROJECT_DIR/test_a.py'), 27 | get_environment_variable_setter_command('TEST_FILE', '$PROJECT_DIR/test_b.py'), 28 | get_environment_variable_setter_command('TEST_FILE', '$PROJECT_DIR/test_c.py'), 29 | ] 30 | self.assertListEqual(expected_atom_commands, actual_atom_commands, 31 | 'List of actual atoms should match list of expected atoms.') 32 | mock_project.execute_command_in_project.assert_called_once_with(_FAKE_ATOMIZER_COMMAND) 33 | 34 | def test_atomizer_raises_exception_when_atomize_command_fails(self): 35 | mock_project = Mock(spec=ProjectType) 36 | mock_project.execute_command_in_project.return_value = ('ERROR ERROR ERROR', _FAILING_EXIT_CODE) 37 | 38 | atomizer = Atomizer([{'TEST_FILE': _FAKE_ATOMIZER_COMMAND}]) 39 | with self.assertRaises(AtomizerError): 40 | atomizer.atomize_in_project(mock_project) 41 | 42 | mock_project.execute_command_in_project.assert_called_once_with(_FAKE_ATOMIZER_COMMAND) 43 | -------------------------------------------------------------------------------- /test/unit/master/test_build_request_handler.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from app.master.atomizer import AtomizerError 4 | from app.master.build_fsm import BuildState 5 | from app.master.build_request_handler import BuildRequestHandler 6 | from test.framework.base_unit_test_case import BaseUnitTestCase 7 | 8 | 9 | @genty 10 | class TestBuildRequestHandler(BaseUnitTestCase): 11 | @genty_dataset( 12 | no_subjobs=([], True), 13 | one_subjob=(['some subjob'], False), 14 | ) 15 | def test_prepare_build_async_calls_finish_only_if_no_subjobs(self, subjobs, build_finish_called): 16 | mock_project_lock = self.patch('threading.Lock').return_value 17 | build_scheduler_mock = self.patch('app.master.build_scheduler.BuildScheduler').return_value 18 | build_request_handler = BuildRequestHandler(build_scheduler_mock) 19 | build_mock = self.patch('app.master.build.Build').return_value 20 | build_mock.is_stopped = False 21 | build_mock.get_subjobs.return_value = subjobs 22 | 23 | build_request_handler._prepare_build_async(build_mock, mock_project_lock) 24 | 25 | if build_finish_called: 26 | build_mock.finish.assert_called_once_with() 27 | else: 28 | self.assertFalse(build_mock.finish.called) 29 | 30 | def test_prepare_build_async_does_not_call_finish_for_canceled_or_error_build(self): 31 | subjobs = [] 32 | mock_project_lock = self.patch('threading.Lock').return_value 33 | build_scheduler_mock = self.patch('app.master.build_scheduler.BuildScheduler').return_value 34 | build_request_handler = BuildRequestHandler(build_scheduler_mock) 35 | build_mock = self.patch('app.master.build.Build').return_value 36 | build_mock.is_stopped = True # this means the BuildState is CANCELED or ERROR 37 | build_mock.get_subjobs.return_value = subjobs 38 | 39 | build_request_handler._prepare_build_async(build_mock, mock_project_lock) 40 | 41 | self.assertFalse(build_mock.finish.called, 'Build finish should not be called for CANCELED build') 42 | 43 | @genty_dataset( 44 | no_subjobs=([],), 45 | one_subjob=(['some subjob'],), 46 | ) 47 | def test_prepare_build_async_does_not_call_mark_failed_for_canceled_build(self, subjobs): 48 | mock_project_lock = self.patch('threading.Lock').return_value 49 | build_scheduler_mock = self.patch('app.master.build_scheduler.BuildScheduler').return_value 50 | build_request_handler = BuildRequestHandler(build_scheduler_mock) 51 | build_mock = self.patch('app.master.build.Build').return_value 52 | build_mock.get_subjobs.return_value = subjobs 53 | build_mock.is_canceled = True 54 | build_mock.prepare.side_effect = AtomizerError 55 | 56 | build_request_handler._prepare_build_async(build_mock, mock_project_lock) 57 | 58 | self.assertFalse(build_mock.mark_failed.called, 'Build mark_failed should not be called for CANCELED build') 59 | -------------------------------------------------------------------------------- /test/unit/master/test_build_scheduler.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | from unittest.mock import Mock 3 | 4 | from app.master.build import Build 5 | from app.master.build_scheduler import BuildScheduler 6 | from app.master.build_scheduler_pool import BuildSchedulerPool 7 | from app.master.job_config import JobConfig 8 | from app.master.slave import Slave 9 | from app.master.subjob import Subjob 10 | from test.framework.base_unit_test_case import BaseUnitTestCase 11 | 12 | 13 | class TestBuildScheduler(BaseUnitTestCase): 14 | 15 | def _get_mock_build(self): 16 | mock_build = Mock(Build) 17 | mock_build.is_canceled = True 18 | config_mock = Mock(JobConfig, **{'max_executors': 20, 'max_executors_per_slave': 10}) 19 | mock_build.project_type.job_config.return_value = config_mock 20 | # We modify the protected member variable because the build_scheduler class 21 | # utilizes it directly. 22 | mock_build._unstarted_subjobs = Queue(maxsize=10) 23 | return mock_build 24 | 25 | def test_execute_next_subjob_or_free_executor_with_canceled_build_frees_executor(self): 26 | # Arrange 27 | mock_build = self._get_mock_build() 28 | mock_build.is_canceled = True 29 | mock_slave = Mock(Slave, **{'num_executors': 10, 'id': 1}) 30 | 31 | # Act 32 | scheduler = BuildScheduler(mock_build, Mock(BuildSchedulerPool)) 33 | scheduler.allocate_slave(mock_slave) 34 | scheduler.execute_next_subjob_or_free_executor(mock_slave) 35 | 36 | # Assert 37 | mock_slave.free_executor.assert_called_once_with() 38 | 39 | def test_executor_or_free_with_canceled_build_tearsdown_and_unallocates_when_all_free(self): 40 | # Arrange 41 | mock_build = self._get_mock_build() 42 | mock_build.is_canceled = True 43 | mock_slave = Mock(Slave, **{'num_executors': 10, 'id': 1}) 44 | mock_slave.free_executor.return_value = 0 45 | 46 | # Act 47 | scheduler = BuildScheduler(mock_build, Mock(BuildSchedulerPool)) 48 | scheduler.allocate_slave(mock_slave) 49 | scheduler.execute_next_subjob_or_free_executor(mock_slave) 50 | 51 | # Assert 52 | mock_slave.free_executor.assert_called_once_with() 53 | mock_slave.teardown.assert_called_once_with() 54 | 55 | def test_execute_next_subjob_or_free_executor_with_no_unstarted_subjobs_frees_executors(self): 56 | # Arrange 57 | mock_build = self._get_mock_build() 58 | mock_build.is_canceled = False 59 | mock_build._unstarted_subjobs = Queue(maxsize=10) 60 | mock_slave = Mock(Slave, **{'num_executors': 10, 'id': 1}) 61 | 62 | # Act 63 | scheduler = BuildScheduler(mock_build, Mock(BuildSchedulerPool)) 64 | scheduler.allocate_slave(mock_slave) 65 | scheduler.execute_next_subjob_or_free_executor(mock_slave) 66 | 67 | # Assert 68 | mock_slave.free_executor.assert_called_once_with() 69 | 70 | def test_executor_or_free_starts_subjob_and_marks_build_in_progress(self): 71 | # Arrange 72 | mock_build = self._get_mock_build() 73 | mock_build.is_canceled = False 74 | mock_build._unstarted_subjobs = Queue(maxsize=10) 75 | mock_subjob = Mock(Subjob) 76 | mock_build._unstarted_subjobs.put(mock_subjob) 77 | mock_slave = Mock(Slave, **{'num_executors': 10, 'id': 1}) 78 | 79 | # Act 80 | scheduler = BuildScheduler(mock_build, Mock(BuildSchedulerPool)) 81 | scheduler.allocate_slave(mock_slave) 82 | scheduler.execute_next_subjob_or_free_executor(mock_slave) 83 | 84 | # Assert 85 | mock_slave.start_subjob.assert_called_once_with(mock_subjob) 86 | mock_subjob.mark_in_progress.assert_called_once_with(mock_slave) 87 | -------------------------------------------------------------------------------- /test/unit/master/test_job_config.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from app.master.job_config import JobConfig, ConfigValidationError 4 | from test.framework.base_unit_test_case import BaseUnitTestCase 5 | 6 | 7 | @genty 8 | class TestJobConfig(BaseUnitTestCase): 9 | 10 | @genty_dataset( 11 | {'atomizers': [{'TESTPATH': 'atomizer command'}]}, 12 | {'commands': ['shell command 1', 'shell command 2;']} 13 | ) 14 | def test_construct_from_dict_raise_error_without_requried_fields(self, config_dict): 15 | with self.assertRaises(ConfigValidationError): 16 | JobConfig.construct_from_dict('some_job_name', config_dict) 17 | 18 | def test_construct_from_dict_for_valid_conf_with_only_required_fields(self): 19 | config_dict = { 20 | 'commands': ['shell command 1', 'shell command 2;'], 21 | 'atomizers': [{'TESTPATH': 'atomizer command'}], 22 | } 23 | job_config = JobConfig.construct_from_dict('some_job_name', config_dict) 24 | 25 | self.assertEquals(job_config.command, 'shell command 1 && shell command 2') 26 | self.assertEquals(job_config.name, 'some_job_name') 27 | 28 | def test_construct_from_dict_for_valid_conf_with_all_fields(self): 29 | config_dict = { 30 | 'commands': ['shell command 1', 'shell command 2;'], 31 | 'atomizers': [{'TESTPATH': 'atomizer command'}], 32 | 'setup_build': ['setup command 1;', 'setup command 2;'], 33 | 'teardown_build': ['teardown command 1;', 'teardown command 2;'], 34 | 'max_executors': 100, 35 | 'max_executors_per_slave': 2, 36 | } 37 | job_config = JobConfig.construct_from_dict('some_job_name', config_dict) 38 | 39 | self.assertEquals(job_config.command, 'shell command 1 && shell command 2') 40 | self.assertEquals(job_config.name, 'some_job_name') 41 | self.assertEquals(job_config.setup_build, 'setup command 1 && setup command 2') 42 | self.assertEquals(job_config.teardown_build, 'teardown command 1 && teardown command 2') 43 | self.assertEquals(job_config.max_executors, 100) 44 | self.assertEquals(job_config.max_executors_per_slave, 2) 45 | -------------------------------------------------------------------------------- /test/unit/master/test_subjob.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | from app.master.atom import Atom, AtomState 3 | from app.master.job_config import JobConfig 4 | from app.master.subjob import Subjob 5 | from app.project_type.project_type import ProjectType 6 | from test.framework.base_unit_test_case import BaseUnitTestCase 7 | 8 | 9 | class TestSubjob(BaseUnitTestCase): 10 | 11 | def setUp(self): 12 | super().setUp() 13 | self._job_config_command = 'fake command' 14 | self._subjob = Subjob( 15 | build_id=12, 16 | subjob_id=34, 17 | project_type=Mock(spec_set=ProjectType), 18 | job_config=Mock(spec=JobConfig, command=self._job_config_command), 19 | atoms=[ 20 | Atom( 21 | 'export BREAKFAST="pancakes";', 22 | expected_time=23.4, 23 | actual_time=56.7, 24 | exit_code=1, 25 | state=AtomState.NOT_STARTED, 26 | atom_id=0, 27 | ), 28 | Atom( 29 | 'export BREAKFAST="cereal";', 30 | expected_time=89.0, 31 | actual_time=24.6, 32 | exit_code=0, 33 | state=AtomState.NOT_STARTED, 34 | atom_id=1, 35 | ), 36 | ], 37 | ) 38 | 39 | def test_subjob_constructor_sets_subjob_id_on_atoms(self): 40 | atoms = [Mock(), Mock()] 41 | Subjob(build_id=1, subjob_id=4, project_type=Mock(), job_config=Mock(), atoms=atoms) 42 | for atom in atoms: 43 | self.assertEqual(atom.subjob_id, 4) 44 | 45 | def test_api_representation_matches_expected(self): 46 | actual_api_repr = self._subjob.api_representation() 47 | 48 | expected_api_repr = { 49 | 'id': 34, 50 | 'command': self._job_config_command, 51 | 'slave': None, 52 | 'atoms': [ 53 | { 54 | 'id': 0, 55 | 'command_string': 'export BREAKFAST="pancakes";', 56 | 'expected_time': 23.4, 57 | 'actual_time': 56.7, 58 | 'exit_code': 1, 59 | 'state': 'NOT_STARTED', 60 | 'subjob_id': 34 61 | }, 62 | { 63 | 'id': 1, 64 | 'command_string': 'export BREAKFAST="cereal";', 65 | 'expected_time': 89.0, 66 | 'actual_time': 24.6, 67 | 'exit_code': 0, 68 | 'state': 'NOT_STARTED', 69 | 'subjob_id': 34 70 | }, 71 | ] 72 | } 73 | self.assertEqual(actual_api_repr, expected_api_repr, 'Actual api representation should match expected.') 74 | 75 | def _assert_atoms_are_in_state(self, api_repr, state_str): 76 | for atom_dict in api_repr['atoms']: 77 | self.assertEqual(atom_dict['state'], state_str) 78 | 79 | def test_mark_in_progress_marks_all_atoms_in_progress(self): 80 | self._subjob.mark_in_progress(None) 81 | actual_api_repr = self._subjob.api_representation() 82 | self._assert_atoms_are_in_state(actual_api_repr, 'IN_PROGRESS') 83 | 84 | def test_mark_completed_marks_all_atoms_completed(self): 85 | self._subjob.mark_completed() 86 | actual_api_repr = self._subjob.api_representation() 87 | self._assert_atoms_are_in_state(actual_api_repr, 'COMPLETED') 88 | -------------------------------------------------------------------------------- /test/unit/master/test_subjob_calculator.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | from genty import genty, genty_dataset 4 | 5 | from app.master.atom import Atom 6 | from app.master.atomizer import Atomizer 7 | from app.master.job_config import JobConfig 8 | from app.master.subjob_calculator import compute_subjobs_for_build 9 | from app.project_type.project_type import ProjectType 10 | from test.framework.base_unit_test_case import BaseUnitTestCase 11 | 12 | 13 | @genty 14 | class TestSubjobCalculator(BaseUnitTestCase): 15 | @genty_dataset( 16 | atoms_override_specified=(['override1', 'override2'], None, False), 17 | atoms_override_not_specified=(None, [Atom('atom_value_1'), Atom('atom_value_2')], True), 18 | ) 19 | def test_compute_subjobs_for_build_only_atomizes_if_override_not_specified(self, atoms_override, atomizer_output, 20 | atomizer_called): 21 | """ 22 | :type atoms_override: list[str] | None 23 | :type atomizer_output: list[Atom] | None 24 | :type atomizer_called: bool 25 | """ 26 | self.patch('os.path.isfile').return_value = False 27 | mock_project = Mock(spec_set=ProjectType()) 28 | mock_project.atoms_override = atoms_override 29 | mock_project.timing_file_path.return_value = '/some/path/doesnt/matter' 30 | mock_project.project_directory = '/some/project/directory' 31 | mock_atomizer = Mock(spec_set=Atomizer) 32 | mock_atomizer.atomize_in_project.return_value = atomizer_output 33 | mock_job_config = Mock(spec=JobConfig) 34 | mock_job_config.name = 'some_config' 35 | mock_job_config.max_executors = 1 36 | mock_job_config.atomizer = mock_atomizer 37 | 38 | compute_subjobs_for_build(build_id=1, job_config=mock_job_config, project_type=mock_project) 39 | 40 | self.assertEquals(mock_atomizer.atomize_in_project.called, atomizer_called) 41 | -------------------------------------------------------------------------------- /test/unit/project_type/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/project_type/__init__.py -------------------------------------------------------------------------------- /test/unit/project_type/test_directory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join, splitdrive 3 | 4 | from genty import genty, genty_dataset 5 | 6 | from app.project_type.directory import Directory 7 | from test.framework.base_unit_test_case import BaseUnitTestCase 8 | from app.util.conf.configuration import Configuration 9 | 10 | 11 | @genty 12 | class TestDirectory(BaseUnitTestCase): 13 | 14 | # os.getcwd() but without mount point or leading os.sep 15 | # e.g. '/var/bar' would become 'var/bar' on POSIX and 'c:\\temp\\foo' would become 'temp\\foo' 16 | _CWD_SYS_PATH_WITHOUT_SEP = splitdrive(os.getcwd())[1][len(os.sep):] 17 | _TIMINGS_DIR_SYS_PATH = join(os.getcwd(), 'var', 'besttimingserver') 18 | 19 | def setUp(self): 20 | super().setUp() 21 | Configuration['timings_directory'] = self._TIMINGS_DIR_SYS_PATH 22 | 23 | # Using `os.path.join` here instead of hard coding the path so the test is cross-platform. 24 | @genty_dataset( 25 | relative_project_dir=( 26 | join('my_code', 'a_smart_project'), 27 | 'UnitTests', 28 | join( 29 | _TIMINGS_DIR_SYS_PATH, 30 | _CWD_SYS_PATH_WITHOUT_SEP, 31 | 'my_code', 32 | 'a_smart_project', 33 | 'UnitTests.timing.json', 34 | ), 35 | ), 36 | absolute_project_dir=( 37 | join(os.getcwd(), 'Users', 'me', 'neato project'), 38 | 'Functional Tests', 39 | join( 40 | _TIMINGS_DIR_SYS_PATH, 41 | _CWD_SYS_PATH_WITHOUT_SEP, 42 | 'Users', 43 | 'me', 44 | 'neato project', 45 | 'Functional Tests.timing.json', 46 | ), 47 | ), 48 | ) 49 | def test_timing_file_path(self, project_directory, fake_job_name, expected_timing_file_path): 50 | directory_env = Directory(project_directory) 51 | actual_timing_file_path = directory_env.timing_file_path(fake_job_name) 52 | 53 | self.assertEqual(actual_timing_file_path, expected_timing_file_path) 54 | 55 | @genty_dataset( 56 | (True, False), 57 | (False, True), 58 | ) 59 | def test_fetch_project_raises_runtime_error_only_if_project_dir_does_not_exist( 60 | self, expect_dir_exists, 61 | expect_runtime_error, 62 | ): 63 | # Arrange 64 | directory_env = Directory(join(os.getcwd(), 'my_project')) 65 | mock_os_path_isdir = self.patch('os.path.isdir') 66 | mock_os_path_isdir.return_value = expect_dir_exists 67 | self.patch('app.project_type.directory.node') 68 | 69 | # Act & Assert 70 | if expect_runtime_error: 71 | with self.assertRaises(RuntimeError): 72 | directory_env._fetch_project() 73 | else: 74 | directory_env._fetch_project() 75 | -------------------------------------------------------------------------------- /test/unit/slave/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/slave/__init__.py -------------------------------------------------------------------------------- /test/unit/slave/test_subjob_executor.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, mock_open 2 | from os.path import expanduser, join 3 | 4 | from app.slave.subjob_executor import SubjobExecutor 5 | from app.util.conf.configuration import Configuration 6 | from test.framework.base_unit_test_case import BaseUnitTestCase 7 | 8 | 9 | class TestSubjobExecutor(BaseUnitTestCase): 10 | 11 | def test_configure_project_type_passes_project_type_params_and_calls_setup_executor(self): 12 | project_type_params = {'test': 'value'} 13 | util = self.patch('app.slave.subjob_executor.util') 14 | util.create_project_type = Mock(return_value=Mock()) 15 | executor = SubjobExecutor(1) 16 | 17 | executor.configure_project_type(project_type_params) 18 | 19 | util.create_project_type.assert_called_with(project_type_params) 20 | executor._project_type.setup_executor.assert_called_with() 21 | 22 | def test_configure_project_type_with_existing_project_type_calls_teardown(self): 23 | executor = SubjobExecutor(1) 24 | executor._project_type = Mock() 25 | self.patch('app.slave.subjob_executor.util') 26 | 27 | executor.configure_project_type({}) 28 | 29 | executor._project_type.teardown_executor.assert_called_once() 30 | 31 | def test_run_job_config_setup_calls_project_types_run_job_config_setup(self): 32 | executor = SubjobExecutor(1) 33 | executor._project_type = Mock() 34 | 35 | executor.run_job_config_setup() 36 | 37 | executor._project_type.run_job_config_setup.assert_called_with() 38 | 39 | def test_execute_subjob_passes_correct_build_executor_index_to_execute_command_in_project(self): 40 | Configuration['artifact_directory'] = expanduser('~') 41 | executor = SubjobExecutor(1) 42 | executor._project_type = Mock() 43 | executor._project_type.execute_command_in_project = Mock(return_value=(1, 2)) 44 | self.patch('app.slave.subjob_executor.fs_util') 45 | self.patch('app.slave.subjob_executor.shutil') 46 | output_file_mock = self.patch('app.slave.subjob_executor.open', new=mock_open(read_data=''), create=True).return_value 47 | os = self.patch('app.slave.subjob_executor.os') 48 | os.path = Mock() 49 | os.path.join = Mock(return_value='path') 50 | atomic_commands = ['command'] 51 | executor.id = 2 52 | expected_env_vars = { 53 | 'ARTIFACT_DIR': join(expanduser('~'), '1', 'artifact_2_0'), 54 | 'ATOM_ID': 0, 55 | 'EXECUTOR_INDEX': 2, 56 | 'MACHINE_EXECUTOR_INDEX': 2, 57 | 'BUILD_EXECUTOR_INDEX': 8 58 | } 59 | 60 | executor.execute_subjob(build_id=1, subjob_id=2, atomic_commands=atomic_commands, 61 | base_executor_index=6) 62 | 63 | executor._project_type.execute_command_in_project.assert_called_with('command', expected_env_vars, 64 | output_file=output_file_mock) 65 | -------------------------------------------------------------------------------- /test/unit/subcommands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/subcommands/__init__.py -------------------------------------------------------------------------------- /test/unit/test_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import basename, dirname, relpath 3 | 4 | from test.framework.base_unit_test_case import BaseUnitTestCase 5 | 6 | 7 | class TestTest(BaseUnitTestCase): 8 | """ 9 | This test class is a place for "meta-tests" that attempt to ensure that our tests are being run correctly. 10 | """ 11 | def test_all_test_subdirectories_have_init_py_file(self): 12 | # If a directory is missing an __init__.py, then tests in that directory will not be run! 13 | repo_test_dir_path = dirname(dirname(__file__)) 14 | self.assertEqual(basename(repo_test_dir_path), 'test', 'repo_test_dir_path should be the path of the top-level ' 15 | '"test" directory in the ClusterRunner repo.') 16 | 17 | exempt_dirs = ['__pycache__', '.hypothesis'] # skip special directories 18 | for dir_path, _, files in os.walk(repo_test_dir_path): 19 | if any(exempt_dir in dir_path for exempt_dir in exempt_dirs): 20 | continue 21 | 22 | self.assertIn( 23 | '__init__.py', files, 24 | 'The test directory "{}" does not appear to have an __init__.py file. This will prevent tests in that ' 25 | 'directory from running via "nosetests /test".'.format(relpath(dir_path, repo_test_dir_path))) 26 | -------------------------------------------------------------------------------- /test/unit/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/util/__init__.py -------------------------------------------------------------------------------- /test/unit/util/conf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/util/conf/__init__.py -------------------------------------------------------------------------------- /test/unit/util/conf/test_base_config_loader.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from app.util.conf.base_config_loader import BaseConfigLoader, InvalidConfigError 4 | from app.util.conf.configuration import Configuration 5 | from test.framework.base_unit_test_case import BaseUnitTestCase 6 | 7 | 8 | class _FakeConfigLoader(BaseConfigLoader): 9 | def _get_config_file_whitelisted_keys(self): 10 | return ['some_bool', 'some_int', 'some_list', 'some_str'] 11 | 12 | def configure_defaults(self, conf): 13 | super().configure_defaults(conf) 14 | conf.set('some_bool', True) 15 | conf.set('some_int', 1776) 16 | conf.set('some_list', ['red', 'white', 'blue']) 17 | conf.set('some_str', 'America!') 18 | conf.set('some_nonwhitelisted_key', 1492) 19 | 20 | 21 | @genty 22 | class TestBaseConfigLoader(BaseUnitTestCase): 23 | 24 | @genty_dataset( 25 | bool_type=('some_bool', 'False', False), 26 | int_type=('some_int', '1999', 1999), 27 | list_type=('some_list', ['a', 'b', 'c'], ['a', 'b', 'c']), 28 | str_type=('some_str', 'OneTwoThree', 'OneTwoThree'), 29 | ) 30 | def test_all_datatypes_can_be_overridden_by_value_in_file(self, key, parsed_val, expected_stored_conf_val): 31 | mock_config_file = self.patch('app.util.conf.base_config_loader.ConfigFile').return_value 32 | mock_config_file.read_config_from_disk.return_value = {'general': {key: parsed_val}} 33 | config = Configuration.singleton() 34 | 35 | config_loader = _FakeConfigLoader() 36 | config_loader.configure_defaults(config) 37 | config_loader.load_from_config_file(config, config_filename='fake_filename') 38 | 39 | actual_stored_conf_val = Configuration[key] 40 | self.assertEqual(expected_stored_conf_val, actual_stored_conf_val, 41 | 'The configuration value for the key "{}" was expected to be {}:{}, but was {}:{}.'.format( 42 | key, type(expected_stored_conf_val), expected_stored_conf_val, 43 | type(actual_stored_conf_val), actual_stored_conf_val)) 44 | 45 | @genty_dataset( 46 | nonexistent_key=('some_nonexistent_key', '1999'), 47 | nonwhitelisted_key=('some_nonwhitelisted_key', '2001'), 48 | ) 49 | def test_error_is_raised_when_conf_file_contains_nonexistent_or_nonwhitelisted_key(self, key, parsed_val): 50 | mock_config_file = self.patch('app.util.conf.base_config_loader.ConfigFile').return_value 51 | mock_config_file.read_config_from_disk.return_value = {'general': {key: parsed_val}} 52 | config = Configuration.singleton() 53 | 54 | config_loader = _FakeConfigLoader() 55 | config_loader.configure_defaults(config) 56 | 57 | with self.assertRaises(InvalidConfigError): 58 | config_loader.load_from_config_file(config, config_filename='fake_filename') 59 | 60 | def test_list_type_conf_file_values_are_correctly_converted_to_lists(self): 61 | conf = Configuration.singleton() 62 | conf.set('some_list', ['localhost']) # The previous conf value determines the expected type: a list. 63 | conf_file_value = 'my-lonely-slave' # ConfigObj parses value to a string type if only one element is specified. 64 | 65 | config_loader = BaseConfigLoader() 66 | config_loader._cast_and_set('some_list', conf_file_value, conf) 67 | 68 | expected_conf_setting = [conf_file_value] 69 | self.assertListEqual(conf.get('some_list'), expected_conf_setting, 70 | 'The config loader should convert string values into single element lists for conf keys ' 71 | 'that are expected to be of type list.') 72 | -------------------------------------------------------------------------------- /test/unit/util/conf/test_configuration.py: -------------------------------------------------------------------------------- 1 | from app.util.conf.configuration import Configuration 2 | from test.framework.base_unit_test_case import BaseUnitTestCase 3 | 4 | 5 | class TestConfiguration(BaseUnitTestCase): 6 | 7 | _test_conf_values = { 8 | 'Family': 'Stegosauridae', 9 | 'Genus': 'Tuojiangosaurus', 10 | 'Species': 'multispinus', 11 | } 12 | 13 | def setUp(self): 14 | super().setUp() 15 | self.mock_base_config = self.patch('app.util.conf.base_config_loader.BaseConfigLoader') 16 | 17 | def test_conf_values_can_be_set_via_set_method(self): 18 | conf = Configuration.singleton() 19 | for conf_key, conf_value in self._test_conf_values.items(): 20 | conf.set(conf_key, conf_value) 21 | 22 | self._assert_conf_values_match_expected(self._test_conf_values) 23 | 24 | def test_conf_values_can_be_set_via_keyed_access(self): 25 | for conf_key, conf_value in self._test_conf_values.items(): 26 | Configuration[conf_key] = conf_value 27 | 28 | self._assert_conf_values_match_expected(self._test_conf_values) 29 | 30 | def _assert_conf_values_match_expected(self, expected_conf_values): 31 | conf = Configuration.singleton() 32 | for conf_key, expected_conf_value in expected_conf_values.items(): 33 | self.assertEqual(expected_conf_value, conf.get(conf_key), 34 | 'Actual conf value via get() should match expected.') 35 | self.assertEqual(expected_conf_value, Configuration[conf_key], 36 | 'Actual conf value via keyed access should match expected.') 37 | -------------------------------------------------------------------------------- /test/unit/util/conf/test_master_config_loader.py: -------------------------------------------------------------------------------- 1 | from app.util.conf.master_config_loader import MasterConfigLoader 2 | from app.util.conf.configuration import Configuration 3 | from test.framework.base_unit_test_case import BaseUnitTestCase 4 | 5 | 6 | class TestMasterConfigLoader(BaseUnitTestCase): 7 | 8 | def test_configure_default_sets_protocol_scheme_to_http(self): 9 | mock_config_file = self.patch('app.util.conf.base_config_loader.ConfigFile').return_value 10 | 11 | config = Configuration.singleton() 12 | config_loader = MasterConfigLoader() 13 | config_loader.configure_defaults(config) 14 | 15 | key = 'protocol_scheme' 16 | expected_stored_protocol_scheme_value = 'http' 17 | actual_stored_protocol_scheme_value = Configuration[key] 18 | 19 | self.assertEqual(expected_stored_protocol_scheme_value, actual_stored_protocol_scheme_value, 20 | 'The configuration value for the key "{}" was expected to be {}:{}, but was {}:{}.'.format( 21 | key, type(expected_stored_protocol_scheme_value), expected_stored_protocol_scheme_value, 22 | type(actual_stored_protocol_scheme_value), actual_stored_protocol_scheme_value)) 23 | 24 | def test_configure_postload_sets_protocol_scheme_to_https(self): 25 | mock_config_file = self.patch('app.util.conf.base_config_loader.ConfigFile').return_value 26 | mock_config_file.read_config_from_disk.return_value = {'general': {'https_cert_file': '/path/to/cert', 27 | 'https_key_file': '/path/to/key'}, 28 | 'master': {} 29 | } 30 | # Unpached the patched method in BaseUnitTestCase for only this test case 31 | # load_from_config_file is needed to update the Configuration with above "https_cert_file" and 32 | # "https_key_file" values 33 | self.unpatch('app.util.conf.master_config_loader.MasterConfigLoader.load_from_config_file') 34 | 35 | config = Configuration.singleton() 36 | config_loader = MasterConfigLoader() 37 | config_loader.configure_defaults(config) 38 | config_loader.load_from_config_file(config, config_filename='fake_filename') 39 | config_loader.configure_postload(config) 40 | 41 | key = 'protocol_scheme' 42 | expected_stored_protocol_scheme_value = 'https' 43 | actual_stored_protocol_scheme_value = Configuration[key] 44 | 45 | self.assertEqual(expected_stored_protocol_scheme_value, actual_stored_protocol_scheme_value, 46 | 'The configuration value for the key "{}" was expected to be {}:{}, but was {}:{}.'.format( 47 | key, type(expected_stored_protocol_scheme_value), expected_stored_protocol_scheme_value, 48 | type(actual_stored_protocol_scheme_value), actual_stored_protocol_scheme_value)) 49 | -------------------------------------------------------------------------------- /test/unit/util/conf/test_slave_config_loader.py: -------------------------------------------------------------------------------- 1 | from app.util.conf.slave_config_loader import SlaveConfigLoader 2 | from app.util.conf.configuration import Configuration 3 | from test.framework.base_unit_test_case import BaseUnitTestCase 4 | 5 | 6 | class TestSlaveConfigLoader(BaseUnitTestCase): 7 | 8 | def test_configure_default_sets_protocol_scheme_to_http(self): 9 | mock_config_file = self.patch('app.util.conf.base_config_loader.ConfigFile').return_value 10 | 11 | config = Configuration.singleton() 12 | config_loader = SlaveConfigLoader() 13 | config_loader.configure_defaults(config) 14 | 15 | key = 'protocol_scheme' 16 | expected_stored_protocol_scheme_value = 'http' 17 | actual_stored_protocol_scheme_value = Configuration[key] 18 | 19 | self.assertEqual(expected_stored_protocol_scheme_value, actual_stored_protocol_scheme_value, 20 | 'The configuration value for the key "{}" was expected to be {}:{}, but was {}:{}.'.format( 21 | key, type(expected_stored_protocol_scheme_value), expected_stored_protocol_scheme_value, 22 | type(actual_stored_protocol_scheme_value), actual_stored_protocol_scheme_value)) 23 | 24 | def test_configure_postload_sets_protocol_scheme_to_https(self): 25 | mock_config_file = self.patch('app.util.conf.base_config_loader.ConfigFile').return_value 26 | mock_config_file.read_config_from_disk.return_value = {'general': {'https_cert_file': '/path/to/cert', 27 | 'https_key_file': '/path/to/key'}, 28 | 'slave': {} 29 | } 30 | 31 | config = Configuration.singleton() 32 | config_loader = SlaveConfigLoader() 33 | config_loader.configure_defaults(config) 34 | config_loader.load_from_config_file(config, config_filename='fake_filename') 35 | config_loader.configure_postload(config) 36 | 37 | key = 'protocol_scheme' 38 | expected_stored_protocol_scheme_value = 'https' 39 | actual_stored_protocol_scheme_value = Configuration[key] 40 | 41 | self.assertEqual(expected_stored_protocol_scheme_value, actual_stored_protocol_scheme_value, 42 | 'The configuration value for the key "{}" was expected to be {}:{}, but was {}:{}.'.format( 43 | key, type(expected_stored_protocol_scheme_value), expected_stored_protocol_scheme_value, 44 | type(actual_stored_protocol_scheme_value), actual_stored_protocol_scheme_value)) 45 | -------------------------------------------------------------------------------- /test/unit/util/shell/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/util/shell/__init__.py -------------------------------------------------------------------------------- /test/unit/util/shell/test_factory.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from app.util.shell.shell_client_factory import ShellClientFactory 4 | from app.util.shell.local_shell_client import LocalShellClient 5 | from app.util.shell.remote_shell_client import RemoteShellClient 6 | from test.framework.base_unit_test_case import BaseUnitTestCase 7 | 8 | 9 | @genty 10 | class TestShellClientFactory(BaseUnitTestCase): 11 | @genty_dataset( 12 | local_shell=(LocalShellClient, 'localhost'), 13 | remote_shell=(RemoteShellClient, 'mordor') 14 | ) 15 | def test_create_returns_instance_of_expected(self, expected_class_type, host_name): 16 | shell_client = ShellClientFactory.create(host=host_name, user='sauron') 17 | self.assertEqual(expected_class_type, type(shell_client)) 18 | -------------------------------------------------------------------------------- /test/unit/util/shell/test_local_shell_client.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from test.framework.base_unit_test_case import BaseUnitTestCase 4 | from app.util.shell.local_shell_client import LocalShellClient 5 | from app.util.shell.shell_client import Response, EmptyResponse 6 | 7 | 8 | @genty 9 | class TestLocalShellClient(BaseUnitTestCase): 10 | 11 | _HOST = 'host' 12 | _USER = 'user' 13 | _SOURCE = 'source' 14 | _DESTINATION = 'destination' 15 | 16 | def setUp(self): 17 | super().setUp() 18 | self.mock_shutil = self.patch('app.util.shell.local_shell_client.shutil') 19 | self.mock_Popen = self.patch('app.util.shell.local_shell_client.Popen_with_delayed_expansion') 20 | 21 | @genty_dataset( 22 | empty_response=(True, EmptyResponse()), 23 | normal_response=(False, Response()) 24 | ) 25 | def test_exec_command_returns_expected_response(self, async, expected): 26 | self.create_mock_popen() 27 | client = LocalShellClient(self._HOST, self._USER) 28 | res = client.exec_command('ls', async=async) 29 | self.assertEqual(res, expected) 30 | 31 | def create_mock_popen(self, output=None, error=None, retcode=None): 32 | mock_popen = self.mock_Popen.return_value 33 | mock_popen.returncode = retcode 34 | mock_popen.communicate.return_value = output, error 35 | return mock_popen 36 | 37 | def test_copy_returns_expected_response(self): 38 | expected = Response(raw_output=self._DESTINATION.encode(), returncode=0) 39 | self.mock_shutil_copy_rval(self._DESTINATION) 40 | client = LocalShellClient(self._HOST, self._USER) 41 | res = client.copy(self._SOURCE, self._DESTINATION) 42 | self.assertEqual(res, expected) 43 | 44 | def mock_shutil_copy_rval(self, new_rval): 45 | self.mock_shutil.copy.return_value = new_rval 46 | -------------------------------------------------------------------------------- /test/unit/util/shell/test_remote_shell_client.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | 3 | from app.util.shell.remote_shell_client import RemoteShellClient 4 | from app.util.shell.shell_client import Response, EmptyResponse 5 | from test.framework.base_unit_test_case import BaseUnitTestCase 6 | 7 | 8 | @genty 9 | class TestRemoteShellClient(BaseUnitTestCase): 10 | def setUp(self): 11 | super().setUp() 12 | self.mock_Popen = self.patch('app.util.shell.remote_shell_client.Popen_with_delayed_expansion') 13 | 14 | @genty_dataset( 15 | normal_response=(False, Response(raw_output=b'\ncat', raw_error=b'\ndog', returncode=0)), 16 | async_response=(True, EmptyResponse()) 17 | ) 18 | def test_exec_command_returns_expected(self, async_enabled, response): 19 | self.mock_popen_communicate_call(stdout=b'\ncat', stderr=b'\ndog') 20 | client = RemoteShellClient('host', 'user') 21 | res = client.exec_command('ls', async=async_enabled) 22 | self.assertEqual(res, response) 23 | 24 | def mock_popen_communicate_call(self, stdout=b'\n', stderr=b'', returncode=0): 25 | mock_popen = self.mock_Popen.return_value 26 | mock_popen.communicate.return_value = stdout, stderr 27 | mock_popen.returncode = returncode 28 | -------------------------------------------------------------------------------- /test/unit/util/shell/test_shell_client.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset, genty_args 2 | 3 | from test.framework.base_unit_test_case import BaseUnitTestCase 4 | from app.util.shell.shell_client import ShellClient, Response 5 | 6 | 7 | @genty 8 | class TestShellClient(BaseUnitTestCase): 9 | 10 | _HOST = 'host' 11 | _USER = 'user' 12 | 13 | @genty_dataset( 14 | async_and_err_on_failure=(NotImplementedError, True, True) 15 | ) 16 | def test_exec_command_raises_expected_error(self, expected_error, async, error_on_failure): 17 | client = ShellClient(self._HOST, self._USER) 18 | with self.assertRaises(expected_error): 19 | client.exec_command('foo', async=async, error_on_failure=error_on_failure) 20 | 21 | @genty_dataset( 22 | successful_copy_with_error_on_failure=genty_args( 23 | source='source0', 24 | dest='dest0', 25 | error_on_failure=True, 26 | copy_successful=True, 27 | expect_runtime_error=False, 28 | ), 29 | failed_copy_with_error_on_failure=genty_args( 30 | source='source1', 31 | dest='dest1', 32 | error_on_failure=True, 33 | copy_successful=False, 34 | expect_runtime_error=True, 35 | ), 36 | failed_copy_without_error_on_failure=genty_args( 37 | source='source2', 38 | dest='dest2', 39 | error_on_failure=False, 40 | copy_successful=False, 41 | expect_runtime_error=False, 42 | ), 43 | successful_copy_without_error_on_failure=genty_args( 44 | source='source3', 45 | dest='dest3', 46 | error_on_failure=False, 47 | copy_successful=True, 48 | expect_runtime_error=False, 49 | ), 50 | ) 51 | def test_copy(self, source, dest, error_on_failure, copy_successful, expect_runtime_error): 52 | # Arrange 53 | client = ShellClient(self._HOST, self._USER) 54 | mock_copy_on_client = self.patch('app.util.shell.shell_client.ShellClient._copy_on_client') 55 | res = Response(returncode=0 if copy_successful else 1) 56 | mock_copy_on_client.return_value = res 57 | 58 | # Act 59 | if expect_runtime_error: 60 | with self.assertRaises(RuntimeError): 61 | client.copy(source, dest, error_on_failure) 62 | else: 63 | self.assertEqual(client.copy(source, dest, error_on_failure), res) 64 | 65 | # Assert 66 | mock_copy_on_client.assert_called_once_with(client, source, dest) 67 | -------------------------------------------------------------------------------- /test/unit/util/test_decorators.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import call 2 | 3 | from app.util.decorators import retry_on_exception_exponential_backoff 4 | from test.framework.base_unit_test_case import BaseUnitTestCase 5 | 6 | 7 | class TestDecorators(BaseUnitTestCase): 8 | 9 | def test_retry_on_exception_exponential_backoff_doesnt_sleep_if_no_exception(self): 10 | sleep_patch = self.patch('app.util.decorators.time.sleep') 11 | 12 | self.dummy_method_noop() 13 | 14 | self.assertFalse(sleep_patch.called) 15 | 16 | def test_retry_on_exception_exponential_backoff_doesnt_retry_if_incorrect_exception_type(self): 17 | sleep_patch = self.patch('app.util.decorators.time.sleep') 18 | 19 | with self.assertRaises(RuntimeError): 20 | self.dummy_method_raises_exception() 21 | 22 | self.assertFalse(sleep_patch.called) 23 | 24 | def test_retry_on_exception_exponential_backoff_retries_with_correct_sleep_durations(self): 25 | sleep_patch = self.patch('app.util.decorators.time.sleep') 26 | 27 | with self.assertRaises(Exception): 28 | self.dummy_method_always_raises_exception() 29 | 30 | self.assertEquals(sleep_patch.call_count, 4) 31 | sleep_patch.assert_has_calls([call(1), call(3), call(9), call(27)], any_order=False) 32 | 33 | def test_retry_on_exception_exponential_backoff_raise_error_if_initial_delay_is_not_positive(self): 34 | with self.assertRaises(RuntimeError): 35 | self.dummy_method_noop_with_initial_delay_zero() 36 | 37 | def test_retry_on_exception_exponential_backoff_raise_error_if_exponential_factor_is_less_than_one(self): 38 | with self.assertRaises(RuntimeError): 39 | self.dummy_method_noop_with_fraction_exponential_factor() 40 | 41 | @retry_on_exception_exponential_backoff(exceptions=(Exception,)) 42 | def dummy_method_noop(self): 43 | pass 44 | 45 | @retry_on_exception_exponential_backoff(exceptions=(NameError,)) 46 | def dummy_method_raises_exception(self): 47 | raise RuntimeError('Runtime error!') 48 | 49 | @retry_on_exception_exponential_backoff(exceptions=(Exception,), initial_delay=1, total_delay=30, 50 | exponential_factor=3) 51 | def dummy_method_always_raises_exception(self): 52 | # Retry times should be: 1, 3, 9, 27 53 | raise Exception('Exception!') 54 | 55 | @retry_on_exception_exponential_backoff(exceptions=(Exception,), initial_delay=0) 56 | def dummy_method_noop_with_initial_delay_zero(self): 57 | pass 58 | 59 | @retry_on_exception_exponential_backoff(exceptions=(Exception,), initial_delay=1, total_delay=14, 60 | exponential_factor=0.8) 61 | def dummy_method_noop_with_fraction_exponential_factor(self): 62 | pass 63 | -------------------------------------------------------------------------------- /test/unit/util/test_fs.py: -------------------------------------------------------------------------------- 1 | from app.util import fs 2 | from test.framework.base_unit_test_case import BaseUnitTestCase 3 | 4 | 5 | class TestFs(BaseUnitTestCase): 6 | 7 | def test_async_delete_calls_correct_commands(self): 8 | popen_mock = self.patch('app.util.fs.Popen_with_delayed_expansion') 9 | move_mock = self.patch('shutil.move') 10 | self.patch('os.path.isdir').return_value = True 11 | mkdtemp_mock = self.patch('tempfile.mkdtemp') 12 | mkdtemp_mock.return_value = '/tmp/dir' 13 | fs.async_delete('/some/dir') 14 | 15 | move_mock.assert_called_with('/some/dir', '/tmp/dir') 16 | popen_mock.assert_called_with(['rm', '-rf', '/tmp/dir']) -------------------------------------------------------------------------------- /test/unit/util/test_process_utils.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset, genty_args 2 | 3 | from app.util.process_utils import Popen_with_delayed_expansion, get_environment_variable_setter_command 4 | 5 | from test.framework.base_unit_test_case import BaseUnitTestCase 6 | 7 | 8 | @genty 9 | class TestProcessUtils(BaseUnitTestCase): 10 | 11 | @genty_dataset( 12 | str_cmd_on_windows=( 13 | 'set FOO=1 && echo !FOO!', 14 | 'nt', 15 | ['cmd', '/V', '/C', 'set FOO=1 && echo !FOO!'], 16 | ), 17 | list_cmd_on_windows=( 18 | ['set', 'FOO=1', '&&', 'echo', '!FOO!'], 19 | 'nt', 20 | ['cmd', '/V', '/C', 'set', 'FOO=1', '&&', 'echo', '!FOO!'], 21 | ), 22 | str_cmd_on_posix=( 23 | 'export FOO=1; echo $FOO', 24 | 'posix', 25 | 'export FOO=1; echo $FOO', 26 | ), 27 | list_cmd_on_posix=( 28 | ['export', 'FOO=1;', 'echo', '$FOO'], 29 | 'posix', 30 | ['export', 'FOO=1;', 'echo', '$FOO'], 31 | ), 32 | ) 33 | def test_Popen_with_deplayed_expansion(self, input_cmd, os_name, expected_final_cmd): 34 | # Arrange 35 | mock_os = self.patch('app.util.process_utils.os') 36 | mock_os.name = os_name 37 | mock_subprocess_popen = self.patch('subprocess.Popen') 38 | 39 | # Act 40 | Popen_with_delayed_expansion(input_cmd) 41 | 42 | # Assert 43 | self.assertEqual(len(mock_subprocess_popen.call_args_list), 1) 44 | (cmd,), _ = mock_subprocess_popen.call_args 45 | self.assertEqual(cmd, expected_final_cmd) 46 | 47 | @genty_dataset( 48 | windows=genty_args( 49 | name='FOO', 50 | value='1', 51 | os_name='nt', 52 | expected_command='set FOO=1&&', 53 | ), 54 | posix=genty_args( 55 | name='BAR', 56 | value='2', 57 | os_name='posix', 58 | expected_command='export BAR="2";', 59 | ), 60 | ) 61 | def test_get_environment_variable_setter_command(self, name, value, os_name, expected_command): 62 | # Arrange 63 | mock_os = self.patch('app.util.process_utils.os') 64 | mock_os.name = os_name 65 | 66 | # Act 67 | command = get_environment_variable_setter_command(name, value) 68 | 69 | # Assert 70 | self.assertEqual(command, expected_command) 71 | -------------------------------------------------------------------------------- /test/unit/util/test_safe_thread.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | from test.framework.base_unit_test_case import BaseUnitTestCase 4 | from app.util.unhandled_exception_handler import UnhandledExceptionHandler 5 | from app.util.safe_thread import SafeThread 6 | 7 | 8 | class TestSafeThread(BaseUnitTestCase): 9 | 10 | def test_exception_on_safe_thread_calls_teardown_callbacks(self): 11 | my_awesome_teardown_callback = MagicMock() 12 | unhandled_exception_handler = UnhandledExceptionHandler.singleton() 13 | unhandled_exception_handler.add_teardown_callback(my_awesome_teardown_callback, 'fake arg', fake_kwarg='boop') 14 | 15 | def my_terrible_method(): 16 | raise Exception('Sic semper tyrannis!') 17 | 18 | thread = SafeThread(target=my_terrible_method) 19 | thread.start() 20 | thread.join() 21 | 22 | my_awesome_teardown_callback.assert_called_once_with('fake arg', fake_kwarg='boop') 23 | 24 | def test_normal_execution_on_safe_thread_does_not_call_teardown_callbacks(self): 25 | my_lonely_teardown_callback = MagicMock() 26 | unhandled_exception_handler = UnhandledExceptionHandler.singleton() 27 | unhandled_exception_handler.add_teardown_callback(my_lonely_teardown_callback) 28 | 29 | def my_fantastic_method(): 30 | print('Veritas vos liberabit!') 31 | 32 | thread = SafeThread(target=my_fantastic_method) 33 | thread.start() 34 | thread.join() 35 | 36 | self.assertFalse(my_lonely_teardown_callback.called, 37 | 'The teardown callback should not be called unless an exception is raised.') 38 | -------------------------------------------------------------------------------- /test/unit/util/test_secret.py: -------------------------------------------------------------------------------- 1 | from genty import genty, genty_dataset 2 | import hashlib 3 | 4 | from test.framework.base_unit_test_case import BaseUnitTestCase 5 | from app.util.secret import Secret 6 | 7 | 8 | @genty 9 | class TestSecret(BaseUnitTestCase): 10 | 11 | def test_get_secret_should_return_set_secret(self): 12 | secret = 'secret1234' 13 | Secret.set(secret) 14 | self.assertEqual(secret, Secret.get()) 15 | 16 | @genty_dataset( 17 | no_secret=(None,), 18 | empty_secret=('',), 19 | short_secret=('short',), 20 | null_hash_secret=('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce' 21 | '47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e',), 22 | ) 23 | def test_set_insecure_secrets_fails(self, insecure_secret): 24 | self.assertRaises(RuntimeError, Secret.set, insecure_secret) 25 | 26 | def test_header_generates_128_character_digest(self): 27 | secret = hashlib.sha512().hexdigest() 28 | header = Secret.header('message', secret) 29 | self.assertEqual(len(header[Secret.DIGEST_HEADER_KEY]), 128) 30 | 31 | def test_matching_digests_should_return_true(self): 32 | secret = 'secrettoken' 33 | message = 'message blah blah horse battery staple' 34 | Secret.set(secret) 35 | digest_received = Secret._get_hex_digest(message, secret) 36 | 37 | self.assertTrue(Secret.digest_is_valid(digest_received, message)) 38 | 39 | def test_non_matching_digests_should_return_false(self): 40 | secret = 'secrettoken' 41 | message = 'message blah blah horse battery staple' 42 | Secret.set(secret) 43 | digest_received = Secret._get_hex_digest('not the original message', secret) 44 | 45 | self.assertFalse(Secret.digest_is_valid(digest_received, message)) 46 | 47 | def test_unspecified_digest_should_return_false(self): 48 | secret = 'secrettoken' 49 | message = 'message blah blah horse battery staple' 50 | Secret.set(secret) 51 | digest_received = None 52 | 53 | self.assertFalse(Secret.digest_is_valid(digest_received, message)) 54 | -------------------------------------------------------------------------------- /test/unit/util/test_session_id.py: -------------------------------------------------------------------------------- 1 | from test.framework.base_unit_test_case import BaseUnitTestCase 2 | from app.util.session_id import SessionId 3 | 4 | 5 | class TestSessionId(BaseUnitTestCase): 6 | def test_get_should_return_same_string_on_repeated_calls(self): 7 | session_id = SessionId.get() 8 | self.assertEquals(session_id, SessionId.get()) 9 | -------------------------------------------------------------------------------- /test/unit/util/test_single_use_coin.py: -------------------------------------------------------------------------------- 1 | from test.framework.base_unit_test_case import BaseUnitTestCase 2 | from app.util.single_use_coin import SingleUseCoin 3 | 4 | 5 | class TestSingleUseCoin(BaseUnitTestCase): 6 | 7 | def test_coin_spend_returns_true_only_once(self): 8 | coin = SingleUseCoin() 9 | 10 | self.assertTrue(coin.spend(), 'First call to spend() should return True.') 11 | self.assertFalse(coin.spend(), 'Subsequent calls to spend() should return False.') 12 | self.assertFalse(coin.spend(), 'Subsequent calls to spend() should return False.') 13 | -------------------------------------------------------------------------------- /test/unit/util/test_singleton.py: -------------------------------------------------------------------------------- 1 | from test.framework.base_unit_test_case import BaseUnitTestCase 2 | from app.util.singleton import Singleton, SingletonError 3 | 4 | 5 | class TestSingleton(BaseUnitTestCase): 6 | 7 | def test_singleton_returns_same_instance_every_time(self): 8 | instance_a = Singleton.singleton() 9 | instance_b = Singleton.singleton() 10 | self.assertIs(instance_a, instance_b, 'Singleton.singleton() should return the same instance.') 11 | 12 | def test_singleton_raises_error_on_multiple_instantiations(self): 13 | instance_a = Singleton.singleton() 14 | 15 | with self.assertRaises(SingletonError, msg='Instantiating more than once should raise an error.'): 16 | instance_b = Singleton() 17 | 18 | def test_singletons_can_be_reset(self): 19 | instance_a = Singleton.singleton() 20 | Singleton.reset_singleton() 21 | instance_b = Singleton.singleton() 22 | 23 | self.assertTrue(instance_a is not instance_b, 24 | 'Singleton.singleton() should return a different instance after reset.') 25 | -------------------------------------------------------------------------------- /test/unit/util/test_url_builder.py: -------------------------------------------------------------------------------- 1 | from app.util.url_builder import UrlBuilder 2 | from test.framework.base_unit_test_case import BaseUnitTestCase 3 | 4 | 5 | class TestUrlBuilder(BaseUnitTestCase): 6 | 7 | def test_url_should_generate_correct_url(self): 8 | host = 'master:9000' 9 | first, second, third = 'first', 'second', 'third' 10 | builder = UrlBuilder(host) 11 | url = builder.url(first, second, third) 12 | self.assertEqual('http://{}/v1/{}/{}/{}'.format(host, first, second, third), url, 13 | 'Url generated did not match expectation') 14 | -------------------------------------------------------------------------------- /test/unit/web_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/box/ClusterRunner/55d18016f2c7d2dbb8aec5879459cae654edb045/test/unit/web_framework/__init__.py -------------------------------------------------------------------------------- /test/unit/web_framework/test_route_node.py: -------------------------------------------------------------------------------- 1 | from app.web_framework import route_node as node 2 | from test.framework.base_unit_test_case import BaseUnitTestCase 3 | 4 | 5 | class TestRouteNode(BaseUnitTestCase): 6 | 7 | def get_nested_route_tree(self): 8 | root_route = \ 9 | node.RouteNode(r'/', _ExampleHandler).add_children([ 10 | node.RouteNode(r'widget', _ExampleHandler, 'widgets').add_children([ 11 | node.RouteNode(r'(\d+)', _ExampleHandler, 'widget').add_children([ 12 | node.RouteNode(r'start', _ExampleHandler), 13 | node.RouteNode(r'end', _ExampleHandler) 14 | ]) 15 | ]) 16 | ]) 17 | return root_route 18 | 19 | def test_nested_route_should_generate_multipart_regex(self): 20 | root_route = self.get_nested_route_tree() 21 | inner_route = root_route.children[0].children[0].children[0] 22 | self.assertEqual('/widget/(\d+)/start/?', inner_route.regex(), 'Generated regex does not match expectation.') 23 | 24 | def test_nested_route_should_generate_multipart_template(self): 25 | root_route = self.get_nested_route_tree() 26 | inner_route = root_route.children[0].children[0].children[0] 27 | self.assertEqual('/widget/[widget_id]/start', inner_route.route_template(), 28 | 'Generated route template does not match expectation.') 29 | 30 | def test_ancestors_should_return_parents_recursively(self): 31 | root_route = self.get_nested_route_tree() 32 | inner_route = root_route.children[0].children[0].children[0] 33 | ancestors = inner_route.ancestors() 34 | ancestor_names = [ancestor.name() for ancestor in ancestors] 35 | self.assertEqual(['[widget_id]', 'widget', '/'], ancestor_names, 36 | 'The list of ancestors returned does not match the list of recursive parents.') 37 | 38 | def test_descendants_should_return_all_children_recursively(self): 39 | root_route = self.get_nested_route_tree() 40 | descendants = root_route.descendants() 41 | descendant_names = [descendant.name() for descendant in descendants] 42 | self.assertEqual(['widget', '[widget_id]', 'start', 'end'], descendant_names, 43 | 'Descendants did not return the list of all children recursively') 44 | 45 | 46 | class _ExampleHandler(object): 47 | def get(self, widget_id): 48 | pass 49 | -------------------------------------------------------------------------------- /windows/deploy/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: stop download start start_master connect_slaves check test 2 | BASE_COMMAND := ansible-playbook deploy_clusterrunner.yml -i hosts 3 | 4 | deploy: 5 | $(BASE_COMMAND) 6 | 7 | stop: 8 | $(BASE_COMMAND) --tags "stop" 9 | 10 | download: 11 | $(BASE_COMMAND) --tags "download" 12 | 13 | start: 14 | $(BASE_COMMAND) --tags "start" 15 | 16 | start_master: 17 | $(BASE_COMMAND) --tags "start_master" 18 | 19 | connect_slaves: 20 | $(BASE_COMMAND) --tags "connect_slaves" 21 | 22 | check: 23 | $(BASE_COMMAND) --tags "check" 24 | 25 | #test: 26 | # ~/.clusterrunner/dist/clusterrunner build --master-url {YOUR_MASTER_HOST}:43000 git --url https://github.com/aptxkid/ClusterRunnerDemo.git --job-name Simple 27 | -------------------------------------------------------------------------------- /windows/deploy/README.md: -------------------------------------------------------------------------------- 1 | # Deploy ClusterRunner on Windows 2 | 3 | We provide an option to deploy ClusterRunner on Windows via [Ansible](http://docs.ansible.com/ansible/intro.html). 4 | 5 | ## Prerequisites 6 | 7 | ### Prepare Your Machine 8 | 9 | - Install Ansible on your machine (not Windows machines that will be running ClusterRunner). More about installing Anislbe can be found [here](http://docs.ansible.com/ansible/intro_installation.html). 10 | - (optional) Install ClusterRunner on your machine (Linux or Mac). This is optional but handy if you want to send jobs to ClusterRunner from your machine. 11 | 12 | ### Prepare the Windows Machines 13 | 14 | Ansible requires Windows machines to meet certain requirements to work. Two things specifically: 15 | - Install Powershell 4.0 or higher. 16 | - Install [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-us/download/details.aspx?id=5555). 17 | - Run the [ConfigureRemotingForAnsible.ps1](https://github.com/ansible/ansible/blob/devel/examples/scripts/ConfigureRemotingForAnsible.ps1) on the Windows machines. 18 | - More details about preparing the Windows machines for Ansible can be found [here](http://docs.ansible.com/ansible/intro_windows.html). 19 | 20 | ## Config Ansible for Your Environment 21 | 22 | - Override files/clusterrunner.conf with your conf file that you want to deploy to all the Windows cluster nodes. 23 | - Edit group\_vars/clusterrunner\_nodes.yml with proper username/password for your Windows nodes. 24 | - Edit hosts file with more the information about the cluster (which node will be the master, which nodes will be the slaves, etc.) 25 | 26 | ## Deploy! 27 | 28 | Type 29 | ```bash 30 | make deploy 31 | ``` 32 | and enjoy ClusterRunner on Windows! 33 | -------------------------------------------------------------------------------- /windows/deploy/deploy_clusterrunner.yml: -------------------------------------------------------------------------------- 1 | - name: Stop ClusterRunner 2 | hosts: clusterrunner_nodes 3 | tags: 4 | - stop 5 | 6 | tasks: 7 | - name: Stop existing clusterrunner service 8 | raw: '{{home}}\.clusterrunner\dist\clusterrunner.exe stop' 9 | ignore_errors: true 10 | 11 | - name: Download ClusterRunner 12 | hosts: clusterrunner_nodes 13 | tags: 14 | - download 15 | 16 | tasks: 17 | - name: Assures .clusterrunner directory exists 18 | win_file: path={{home}}\.clusterrunner state=directory 19 | 20 | - name: Download clusterrunner 21 | win_get_url: 22 | url: 'https://cloud.box.com/shared/static/34xqrf342071xvn75gu9vbwuodlp9adq.zip' 23 | dest: '{{home}}\.clusterrunner\clusterrunner.zip' 24 | 25 | - name: Download 7-zip 26 | win_get_url: 27 | url: 'https://chocolatey.org/7za.exe' 28 | dest: '{{home}}\.clusterrunner\7za.exe' 29 | 30 | - name: Unzip clusterrunner.zip 31 | raw: '{{home}}\.clusterrunner\7za.exe x -y -o{{home}}\.clusterrunner {{home}}\.clusterrunner\clusterrunner.zip' 32 | 33 | - name: Copy clusterrunner.conf 34 | win_copy: src=files/clusterrunner.conf dest={{home}}\.clusterrunner\clusterrunner.conf 35 | 36 | - name: Start ClusterRunner Master 37 | hosts: master 38 | tags: 39 | - start 40 | - start_master 41 | 42 | tasks: 43 | - name: Start ClusterRunner Master 44 | script: files/run_process_nohup.ps1 "{{home}}\.clusterrunner\dist\clusterrunner.exe master" 45 | - pause: seconds=5 46 | 47 | - name: Connect ClusterRunner Slaves 48 | hosts: slaves 49 | tags: 50 | - start 51 | - connect_slave 52 | 53 | tasks: 54 | - name: Connect ClusterRunner slaves 55 | script: files/run_process_nohup.ps1 "{{home}}\.clusterrunner\dist\clusterrunner.exe slave -m {{master}}:43000 -e 10" 56 | 57 | - name: Check connection 58 | hosts: slaves 59 | tags: 60 | - check 61 | 62 | tasks: 63 | - name: Check all slaves are connected 64 | local_action: shell curl http://{{master}}:43000/v1/slave 65 | register: curl_output 66 | failed_when: "'{{inventory_hostname}}' not in curl_output.stdout" 67 | -------------------------------------------------------------------------------- /windows/deploy/files/clusterrunner.conf: -------------------------------------------------------------------------------- 1 | ## This file contains the default values for common configuration settings. ClusterRunner expects a 2 | ## clusterrunner.conf file in the user's ~/.clusterrunner directory (or a location specified on the command line), 3 | ## and it will copy this file to ~/.clusterrunner/clusterrunner.conf if that doesn't exist. Uncomment settings to 4 | ## change their defaults. Settings which are specified via the command line (e.g. clusterrunner master --port 43777) 5 | ## will override these values. 6 | 7 | [general] 8 | secret = YOUR_SECRET # this is just a demo conf file. You should override this file with the conf you want to deploy to your Windows Cluster 9 | ## The root directory for files used during the build process. 10 | # base_directory = ~/.clusterrunner 11 | 12 | ## Symlinks to each build's project files are created here, to keep paths consistent across machines. 13 | # build_symlink_directory = /tmp/clusterrunner_build_symlinks 14 | 15 | ## The level to log at. Other options are DEBUG, INFO, NOTICE, WARNING, ERROR, and CRITIAL. 16 | # log_level = 'WARNING' 17 | 18 | ## A list of slaves, used for starting slaves with the "clusterrunner deploy" command 19 | # slaves = hostname01.example.com, hostname02.example.com, hostname03.example.com 20 | 21 | ## The hostname to refer to the local machine with 22 | # hostname = localhost 23 | 24 | ## Should we automatically reject all git remote operations on hosts that are not in known_hosts? 25 | # git_strict_host_key_checking = False 26 | 27 | ## CORS support - a regex to match against allowed API request origins, or None to disable CORS 28 | # cors_allowed_origins_regex = None 29 | 30 | ## Should the slaves get the project from master or not 31 | get_project_from_master = False 32 | 33 | [master] 34 | ## The port the master service will run on 35 | # port = 43000 36 | 37 | [slave] 38 | ## The port the slave service will run on 39 | # port = 43001 40 | 41 | ## The maximum number of parallel executions to run on this slave 42 | # num_executors = 1 43 | 44 | ## The master's hostname this slave will connect to 45 | # master_hostname = localhost 46 | 47 | ## The master's port this slave will connect to 48 | # master_port = 43000 49 | -------------------------------------------------------------------------------- /windows/deploy/files/run_process_nohup.ps1: -------------------------------------------------------------------------------- 1 | Invoke-WmiMethod -path win32_process -name create -argumentlist "$args" 2 | -------------------------------------------------------------------------------- /windows/deploy/group_vars/clusterrunner_nodes.yml: -------------------------------------------------------------------------------- 1 | ansible_ssh_user: your_windows_user_name 2 | ansible_ssh_pass: password_for_the_windows_user 3 | ansible_ssh_port: 5986 4 | ansible_connection: winrm 5 | -------------------------------------------------------------------------------- /windows/deploy/hosts: -------------------------------------------------------------------------------- 1 | [master] 2 | # put the ClusterRunner master host here 3 | # your.windows.master.host.com 4 | 5 | [slaves] 6 | # list out all the ClusterRunner slave hosts here 7 | # your.windows.slave.01.com 8 | # your.windows.slave.02.com 9 | 10 | [clusterrunner_nodes:children] 11 | master 12 | slaves 13 | 14 | [clusterrunner_nodes:vars] 15 | # path of HOME directory on all the VMs 16 | # home=c:\users\your-windows-user 17 | 18 | # ClusterRunner master host, this should be the same as the one under 'master' inventory group 19 | # master=your.windows.master.host.com 20 | -------------------------------------------------------------------------------- /windows/install.ps1: -------------------------------------------------------------------------------- 1 | # variables 2 | $url = "https://cloud.box.com/shared/static/u91zg1fmmlxo2reqo8mwi3hjv7kaeazz.zip" 3 | if ($env:TEMP -eq $null) { 4 | $env:TEMP = Join-Path $env:SystemDrive 'temp' 5 | } 6 | $crTempDir = Join-Path $env:TEMP "clusterrunner" 7 | $tempDir = Join-Path $crTempDir "crInstall" 8 | if (![System.IO.Directory]::Exists($tempDir)) {[System.IO.Directory]::CreateDirectory($tempDir)} 9 | $file = Join-Path $tempDir "clusterrunner.zip" 10 | 11 | function Download-File { 12 | param ( 13 | [string]$url, 14 | [string]$file 15 | ) 16 | Write-Host "Downloading $url to $file" 17 | $downloader = new-object System.Net.WebClient 18 | $downloader.Proxy.Credentials=[System.Net.CredentialCache]::DefaultNetworkCredentials; 19 | $downloader.DownloadFile($url, $file) 20 | } 21 | 22 | # download the package 23 | Download-File $url $file 24 | 25 | # download 7zip 26 | Write-Host "Download 7Zip commandline tool" 27 | $7zaExe = Join-Path $tempDir '7za.exe' 28 | 29 | Download-File 'https://chocolatey.org/7za.exe' "$7zaExe" 30 | 31 | # unzip the package 32 | $targetDir = Join-Path $env:userprofile ".clusterrunner" 33 | if (![System.IO.Directory]::Exists($targetDir)) {[System.IO.Directory]::CreateDirectory($targetDir)} 34 | Write-Host "Extracting $file to .clusterrunner" 35 | Start-Process "$7zaExe" -ArgumentList "x -o`"$targetDir`" -y `"$file`"" -Wait -NoNewWindow 36 | $defaultConf = $targetDir, "dist", "conf", "default_clusterrunner.conf" -join "\" 37 | $targetConf = Join-path $targetDir "clusterrunner.conf" 38 | Copy-Item $defaultConf $targetConf 39 | -------------------------------------------------------------------------------- /windows/run_cr_unit_on_cr.cmd: -------------------------------------------------------------------------------- 1 | python -m app build --job-name UnitOnWindows 2 | python -m app stop 3 | --------------------------------------------------------------------------------