├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── circleci.yml ├── example-jobs ├── DEFAULTS.yaml ├── other.yaml └── test.yaml ├── processor ├── python.py └── requirements.txt └── scripts ├── kubernetes_runner ├── runner ├── start └── verify_kubectl /.gitignore: -------------------------------------------------------------------------------- 1 | .jobs 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:stable 2 | MAINTAINER shanestarcher@gmail.com 3 | 4 | #Docker Hub does not support docker 1.9 yet change back to ARG https://github.com/docker/hub-feedback/issues/460 5 | ENV DOCKERIZE_VERSION=0.2.0 6 | ENV KUBERNETES_VERSION=1.4.5 7 | ENV KOMPOSE_VERSION=0.1.1 8 | 9 | RUN \ 10 | apt-get update && \ 11 | apt-get install -y curl cron python-pip 12 | 13 | RUN \ 14 | mkdir -p /usr/local/bin/ &&\ 15 | curl -SL https://github.com/jwilder/dockerize/releases/download/v${DOCKERIZE_VERSION}/dockerize-linux-amd64-v${DOCKERIZE_VERSION}.tar.gz \ 16 | | tar xzC /usr/local/bin 17 | 18 | RUN \ 19 | curl -SL https://github.com/kubernetes-incubator/kompose/releases/download/v${KOMPOSE_VERSION}/kompose_linux-amd64.tar.gz \ 20 | | tar xzC /usr/local/bin/ &&\ 21 | mv /usr/local/bin/kompose*/kompose /usr/local/bin &&\ 22 | rm -rf /usr/local/bin/kompose_linux-amd64 23 | 24 | RUN \ 25 | curl -SL https://storage.googleapis.com/kubernetes-release/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl &&\ 26 | chmod +x /usr/local/bin/kubectl 27 | 28 | 29 | RUN mkdir -p /app/lockers 30 | RUN curl -SL -o /app/lockers/cronsul-cleanup https://raw.githubusercontent.com/EvanKrall/cronsul/master/cronsul-cleanup &&\ 31 | chmod +x /app/lockers/cronsul-cleanup 32 | RUN curl -SL -o /app/lockers/cronsul https://raw.githubusercontent.com/EvanKrall/cronsul/master/cronsul &&\ 33 | chmod +x /app/lockers/cronsul 34 | 35 | 36 | ADD processor /app/processor 37 | RUN pip install -r /app/processor/requirements.txt 38 | 39 | ADD scripts/* /app/ 40 | 41 | 42 | ONBUILD ADD jobs /app/jobs 43 | ONBUILD RUN /app/processor/python.py /app/jobs &&\ 44 | cp /.jobs/cron/* /etc/cron.d/ &&\ 45 | mv /.jobs/job /app/ &&\ 46 | rm -rf /app/jobs 47 | 48 | ENV LOCKER '' 49 | ENV CONSUL_HOST '' 50 | ENV WHITELIST '' 51 | 52 | ENTRYPOINT ["/app/start"] 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Shane Starcher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TAG=$(shell git rev-parse --abbrev-ref HEAD) 2 | 3 | 4 | .jobs: 5 | ./processor/python.py example-jobs 6 | 7 | clean: 8 | rm -rf .jobs 9 | 10 | build: 11 | docker build -t sstarcher/job-runner:${TAG} . 12 | 13 | deploy: 14 | docker push sstarcher/job-runner:${TAG} 15 | 16 | all: clean .jobs 17 | 18 | .PHONY: clean all -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Concept 2 | **Stitch together a Docker job scheduler, distributed locking, task runner** 3 | 4 | [![CircleCI](https://circleci.com/gh/sstarcher/job-runner.svg?style=svg)](https://circleci.com/gh/sstarcher/job-runner) 5 | [![](https://imagelayers.io/badge/sstarcher/job-runner:latest.svg)](https://imagelayers.io/?images=sstarcher/job-runner:latest 'Get your own badge on imagelayers.io') 6 | [![Docker Registry](https://img.shields.io/docker/pulls/sstarcher/job-runner.svg)](https://registry.hub.docker.com/u/sstarcher/job-runner)  7 | 8 | This repo outputs reaps and alerts on finished kubernetes jobs. 9 | 10 | Project: [https://github.com/sstarcher/job-runner] 11 | (https://github.com/sstarcher/job-runner) 12 | 13 | Docker image: [https://registry.hub.docker.com/u/sstarcher/job-runner/] 14 | (https://registry.hub.docker.com/u/sstarcher/job-runner/) 15 | 16 | 17 | * Job Scheduler: Cron 18 | * Distributed Locking: Consul 19 | * Task Runners: Kubernetes 20 | 21 | 22 | ### Run Methods 23 | * Cron 24 | * If ran with no command argument it will start in cron mode and run on the cron schedule. 25 | * Single Job 26 | * If a job name is specified it will run the job and print out the pod name 27 | * Lockers are disabled in this mode 28 | 29 | ### Deployment Methods 30 | 31 | ``` 32 | apiVersion: v1 33 | kind: Deployment 34 | metadata: 35 | name: job-runner 36 | spec: 37 | replicas: 1 38 | template: 39 | metadata: 40 | labels: 41 | name: job-runner 42 | spec: 43 | containers: 44 | - name: job-runner 45 | image: sstarcher/job-runner:latest 46 | ``` 47 | 48 | 49 | ### Runner Kubernetes 50 | * Set `KUBERNETES_MASTER` to your Kubernetes cluster url example `http://127.0.0.1:8080` 51 | 52 | 53 | ### Lockers 54 | * Consul 55 | * CONSUL_HOST to an address without your cluster - default `localhost` 56 | * CONSUL_PORT to the port your cluster is listening on - default `8500` 57 | 58 | 59 | ### Configuration 60 | * Lockers are disabled by default 61 | * Example job formats are in the `example-jobs` folder 62 | * Job names must be unique 63 | * example-jobs 64 | * [DEFAULTS.yaml](example-jobs/DEFAULTS.yaml) 65 | * [test.yaml](example-jobs/test.yaml) 66 | * This project utilizes docker ONBUILD so any yaml files added under a `jobs` directory will be added and processed 67 | 1. Create a Dockerfile 68 | ``` 69 | FROM sstarcher/job-runner:latest 70 | ``` 71 | 2. Create a folder called jobs 72 | 3. Create a job 73 | ``` 74 | Example: 75 | image: debian:jessie #This value is overriding what is set in the DEFAULTS.yaml 76 | Jobs: 77 | - Test: 78 | time: '* * * * *' 79 | command: echo $job 80 | ``` 81 | 4. docker build --pull -t jobs . 82 | 5. docker run jobs 83 | -------------------------------------------------------------------------------- /circleci.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | environment: 3 | IMAGE: "${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}" 4 | services: 5 | - docker 6 | 7 | test: 8 | override: 9 | - docker build -t ${CIRCLE_SHA1} . 10 | 11 | deployment: 12 | master: 13 | branch: master 14 | commands: 15 | - docker login -e $DOCKER_EMAIL -u $DOCKER_USERNAME -p $DOCKER_PWD 16 | - docker tag ${CIRCLE_SHA1} ${IMAGE}:latest 17 | - docker push ${IMAGE}:latest 18 | branches: 19 | branch: /v.*/ 20 | commands: 21 | - docker login -e $DOCKER_EMAIL -u $DOCKER_USERNAME -p $DOCKER_PWD 22 | - docker tag ${CIRCLE_SHA1} ${IMAGE}:$CIRCLE_BRANCH 23 | - docker push $IMAGE:$CIRCLE_BRANCH -------------------------------------------------------------------------------- /example-jobs/DEFAULTS.yaml: -------------------------------------------------------------------------------- 1 | #This file can not contain job 2 | #All of the top level keys in this file are interested into each job 3 | Configuration: #This block is used to set environment variables that are used by the job runner scripts themselves 4 | spec: 5 | nodeSelector: 6 | usage: global_default 7 | 8 | command: echo $job 9 | restart: 'no' 10 | image: debian:jessie -------------------------------------------------------------------------------- /example-jobs/other.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | Configuration: #This block is used to set environment variables that are used by the job runner scripts themselves 3 | spec: 4 | nodeSelector: 5 | usage: worker 6 | annotations: 7 | job-reaper.github.sstarcher.io/config: test 8 | 9 | 10 | # Top level keys are used for groupings 11 | # All 12 | CronExamples: #Every value directly under this level are inherited by the jobs 13 | image: debian:jessie #This value is overriding what is set in the DEFAULTS.yaml 14 | mem_limit: 2000MB 15 | #The $job key is a unique value that gets injected in based off of the unique key of the job 16 | command: echo $job 17 | Jobs: #The actual jobs are always specified under a key named Jobs 18 | - EveryMinute: 19 | mem_limit: 2000MB 20 | time: '* * * * *' 21 | - SingleLine: '* * * * *' # If the job does not define a hash we assume this is the cron expression 22 | - MultipleValues: 23 | cpu_shares: 512 # Based on 1 core = 1024 24 | time: '* * * * *' 25 | command: echo $job $job #This value overrides what is set above in the root CronExamples 26 | 27 | 28 | -------------------------------------------------------------------------------- /example-jobs/test.yaml: -------------------------------------------------------------------------------- 1 | 2 | 3 | # This job inherits and uses the defaults specified in DEFAULTS.yaml 4 | TestDefaults: 5 | Jobs: 6 | - DefaultTest: 7 | image: debian:latest 8 | time: '* * * * *' 9 | command: sleep 10 10 | 11 | # This is just a secondary grouping 12 | Failure: 13 | Jobs: 14 | - InvalidCommand: 15 | time: '* * * * *' 16 | command: | 17 | \n abc der -f \n 18 | abc 19 | defaults -------------------------------------------------------------------------------- /processor/python.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import yaml 4 | import sys 5 | from string import Template 6 | import os 7 | from os.path import isfile, join, splitext 8 | import copy 9 | import re 10 | import chkcrontab_lib as check 11 | import subprocess 12 | import tempfile 13 | 14 | 15 | 16 | def load(yaml_file): 17 | f = open(yaml_file) 18 | data = yaml.safe_load(f) 19 | f.close() 20 | return data 21 | 22 | 23 | def validate_cron(file_path): 24 | whitelisted_users = None 25 | log = check.LogCounter() 26 | return check.check_crontab(file_path, log, whitelisted_users) 27 | 28 | 29 | def convert_memlimit(value): 30 | if not isinstance(value, str): 31 | return value 32 | match = re.match(r"(\d+)[Mm][Bb]", value) 33 | if match: 34 | return int(match.group(1)) * pow(10, 6) 35 | 36 | return value 37 | 38 | 39 | def convert_cpushares(value): 40 | return value 41 | 42 | 43 | def substitute_all(values, string): 44 | if isinstance(string, str): 45 | template = Template(string) 46 | return template.safe_substitute(values) 47 | else: 48 | return string 49 | 50 | 51 | def merge(source, destination): 52 | """ 53 | Recursive map merge 54 | """ 55 | for key, value in source.items(): 56 | if isinstance(value, dict): 57 | # get node or create one 58 | node = destination.setdefault(key, {}) 59 | merge(value, node) 60 | else: 61 | destination[key] = value 62 | 63 | return destination 64 | 65 | 66 | def convert(file): 67 | command = ['kompose', '--suppress-warnings', 68 | '-f', file, 'convert', '--rc', '--stdout', '--yaml'] 69 | try: 70 | res = subprocess.check_output(command, stderr=subprocess.STDOUT) 71 | return yaml.safe_load(res) 72 | except subprocess.CalledProcessError as err: 73 | print(file) 74 | with open(file, 'r') as fin: 75 | print(fin.read()) 76 | except yaml.scanner.ScannerError as err: 77 | print(' '.join(command)) 78 | print(res) 79 | raise err 80 | 81 | 82 | def compose(file_name, yaml_doc): 83 | user = 'root' 84 | 85 | if not os.path.exists('.jobs'): 86 | os.makedirs('.jobs') 87 | 88 | if not os.path.exists('.jobs/cron'): 89 | os.makedirs('.jobs/cron') 90 | 91 | if not os.path.exists('.jobs/job'): 92 | os.makedirs('.jobs/job') 93 | 94 | config = copy.deepcopy(global_config) 95 | merge(yaml_doc.pop("Configuration", {}), config) 96 | 97 | cron_file = ".jobs/cron/" + file_name.lower() 98 | cron = file(cron_file, 'w') 99 | cron.write('SHELL=/bin/sh\n') 100 | cron.write('PATH=/usr/local/sbin:/usr/local/bin' 101 | ':/sbin:/bin:/usr/sbin:/usr/bin\n') 102 | 103 | for grouping, data in yaml_doc.iteritems(): # use safe_load instead load 104 | defaults = copy.copy(global_defaults) 105 | jobs = data.pop('Jobs') 106 | defaults.update(data) 107 | 108 | for job in jobs: 109 | jobName, jobData = job.popitem() 110 | jobName = jobName.lower() 111 | if os.path.exists('./jobs/job/' + jobName): 112 | print('A job of this name already exists {0}'.format(jobName)) 113 | exit(2) 114 | 115 | dump = {'environment': {'job': jobName}} 116 | 117 | merge(defaults, dump) 118 | time = dump.pop('time', None) 119 | if isinstance(jobData, str): 120 | time = jobData 121 | else: 122 | merge(jobData, dump) 123 | time = dump.pop('time', time) 124 | 125 | for key, value in dump.iteritems(): 126 | dump[key] = substitute_all(dump['environment'], value) 127 | 128 | if "mem_limit" in dump: 129 | dump["mem_limit"] = convert_memlimit(dump["mem_limit"]) 130 | dump['environment']['mem_limit'] = str(dump['mem_limit']) 131 | 132 | stream = tempfile.NamedTemporaryFile() 133 | # Write a YAML representation of data to 'document.yaml'. 134 | yaml.dump({jobName: dump}, stream, default_flow_style=False) 135 | pods = convert(stream.name) 136 | stream.close() 137 | 138 | # Set image pull policy to always if image is latest 139 | for pod in pods['items']: 140 | pod['kind'] = 'Job' 141 | pod['apiVersion'] = 'batch/v1' 142 | 143 | pod.pop('status') 144 | pod['spec'].pop('replicas') 145 | if 'spec' in config: 146 | merge(config['spec'], pod['spec']['template']['spec']) 147 | if 'annotations' in config: 148 | pod['metadata']['annotations'] = {} 149 | merge(config['annotations'], 150 | pod['metadata']['annotations']) 151 | 152 | for container in pod['spec']['template']['spec']['containers']: 153 | container["resources"] = {"limits": {}} 154 | if container['image'].endswith(":latest"): 155 | container['imagePullPolicy'] = 'Always' 156 | if "mem_limit" in dump: 157 | container["resources"]["limits"]['memory'] = str(dump["mem_limit"]/1000/1000) + "Mi" 158 | if "cpu_shares" in dump: 159 | container["resources"]["limits"]['cpu'] = str(dump["cpu_shares"]) + "m" 160 | 161 | stream = file(".jobs/job/" + jobName + ".yaml", 'w') 162 | yaml.dump(pod, stream, default_flow_style=False) 163 | stream.close() 164 | 165 | if time: 166 | text = "{0} {1} /app/runner {2}" \ 167 | " >> /var/log/cron.log 2>&1\n" 168 | cron.write(text.format(time, user, jobName)) 169 | cron.write('#Cron needs a newline at the end') 170 | cron.close() 171 | 172 | validation_log = validate_cron(cron_file) 173 | if validation_log: 174 | print(validation_log) 175 | exit(9) 176 | 177 | 178 | global global_defaults 179 | global global_config 180 | 181 | global_defaults = {} 182 | global_config = {} 183 | 184 | cmdargs = sys.argv[1] 185 | 186 | default_file = "{0}/DEFAULTS.yaml".format(cmdargs) 187 | 188 | 189 | if isfile(default_file): 190 | loaded_defaults = load(default_file) 191 | global_config = loaded_defaults.pop("Configuration", {}) 192 | global_defaults = loaded_defaults 193 | 194 | for f in os.listdir(cmdargs): 195 | path = join(cmdargs, f) 196 | if isfile(path) and (path.endswith(".yaml") or path.endswith(".yml")): 197 | yaml_doc = load(path) 198 | filename, file_extension = splitext(f) 199 | if filename != 'DEFAULTS': 200 | compose(filename, yaml_doc) 201 | -------------------------------------------------------------------------------- /processor/requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML 2 | chkcrontab -------------------------------------------------------------------------------- /scripts/kubernetes_runner: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Set KUBERNETES_MASTER to the URL of your cluster example http://kubemaster:8080 3 | export KUBERNETES_MASTER 4 | 5 | if [[ -z $(kubectl get jobs --selector=service=${JOB} 2> /dev/null ) ]]; then 6 | OUTPUT=$(kubectl create -f /app/job/${JOB}.yaml); RESULT=$? 7 | rm -rf k8s_${JOB} 8 | 9 | if [ $RESULT -ne 0 ]; then 10 | EXIT_CODE=101 11 | fi 12 | else 13 | EXIT_CODE=100 14 | echo "[${JOB}] - OVERRUN" 15 | if [[ "$FOLLOW" == *"true"* ]] ; then 16 | POD_NAME=$(kubectl get pods --selector=service=$JOB -o=jsonpath='{.items[*].metadata.name}') 17 | echo "kubectl logs -f $POD_NAME" 18 | fi 19 | return 20 | fi 21 | 22 | #Ensure the pod is actually registered. Avoids a possible race condition 23 | until kubectl get jobs --selector=service=${JOB} &> /dev/null 24 | do 25 | sleep 0.1 26 | done 27 | 28 | 29 | if [[ "$FOLLOW" != *"true"* ]] ; then 30 | OUTPUT="Running" 31 | EXIT_CODE=0 32 | else 33 | POD_NAME=$(kubectl get pods --selector=service=$JOB -o=jsonpath='{.items[*].metadata.name}') 34 | echo "kubectl logs -f $POD_NAME" 35 | fi 36 | -------------------------------------------------------------------------------- /scripts/runner: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -a 3 | JOB=${1,,} 4 | 5 | source /etc/default/runner 6 | if [ ! -f /app/job/${JOB}.yaml ]; then 7 | echo "The specified job does not exist [${JOB}]" 8 | exit 1 9 | fi 10 | 11 | #Ensure we have a lock to run 12 | case "$LOCKER" in 13 | consul) 14 | export CONSUL_HOST; /app/lockers/cronsul $JOB true #Run this in a subshell unlike everything else 15 | LOCKED=$? 16 | ;; 17 | *) 18 | echo "[$JOB] Skipping lock check" 19 | LOCKED=0 20 | esac 21 | 22 | if [ $LOCKED -eq 0 ]; then 23 | #Run our commands 24 | START=$(date +"%Y-%m-%dT%H:%M:%SZ") 25 | echo "[$JOB] Running @ $START" 26 | . /app/kubernetes_runner 27 | 28 | if [ "$EXIT_CODE" != 0 ]; then 29 | echo "$JOB $OUTPUT $STATUS" 30 | fi 31 | else 32 | echo "[$JOB] Locked via ${LOCKER}" 33 | fi 34 | 35 | case "$LOCKER" in 36 | consul) 37 | export CONSUL_HOST; /app/lockers/cronsul-cleanup > /dev/null 38 | ;; 39 | *) 40 | esac 41 | -------------------------------------------------------------------------------- /scripts/start: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | env -u 'affinity:container' -u 'no_proxy' > /etc/default/runner 4 | 5 | chmod 0644 /etc/cron.d/* 6 | 7 | if [[ -z "$1" ]]; then 8 | echo 'Running cron jobs' 9 | /app/verify_kubectl || true 10 | 11 | if [ -n "$WHITELIST" ]; then 12 | pushd '/etc/cron.d/' > /dev/null 13 | for filename in * 14 | do 15 | FILE="${filename%.*}" 16 | if [[ $WHITELIST != *"$FILE"* ]]; then 17 | rm ${FILE} 18 | fi 19 | done 20 | popd > /dev/null 21 | fi 22 | 23 | dockerize -stdout /var/log/cron.log -poll cron -f 24 | else 25 | export LOCKER='' 26 | export FOLLOW=false 27 | for VAR in "$@" 28 | do 29 | JOB=${VAR,,} 30 | echo "---" 31 | cat /app/job/${JOB}.yaml 32 | done 33 | fi 34 | -------------------------------------------------------------------------------- /scripts/verify_kubectl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export KUBERNETES_MASTER 3 | 4 | set -e 5 | 6 | if [[ "$KUBERNETES_MASTER" == *"https"* || "$KUBERNETES_MASTER" == *"443"* ]]; then 7 | KUBE_TOKEN=$(