├── deimos ├── VERSION ├── err.py ├── path.py ├── timestamp.py ├── usage.py ├── _struct.py ├── sig.py ├── proto.py ├── argv.py ├── cleanup.py ├── logger.py ├── cgroups.py ├── cmd.py ├── flock.py ├── mesos.py ├── __init__.py ├── docker.py ├── containerizer │ ├── __init__.py │ └── docker.py ├── config.py ├── state.py └── containerizer_pb2.py ├── bin ├── run ├── deimos └── ship_it ├── .gitignore ├── Makefile ├── example.cfg ├── setup.py ├── proto ├── containerizer.proto └── mesos.proto ├── integration-test ├── test-suite └── deimos-test.py ├── README.rst └── LICENSE /deimos/VERSION: -------------------------------------------------------------------------------- 1 | 0.4.3 2 | -------------------------------------------------------------------------------- /deimos/err.py: -------------------------------------------------------------------------------- 1 | class Err(RuntimeError): 2 | pass 3 | -------------------------------------------------------------------------------- /bin/run: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | /opt/mesosphere/deimos/deimos "$@" 3 | -------------------------------------------------------------------------------- /deimos/path.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def me(): 6 | return os.path.abspath(sys.argv[0]) 7 | -------------------------------------------------------------------------------- /bin/deimos: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | bin="`dirname "$0"`" && dir="`dirname "$bin"`" && 3 | exec python -m deimos.__init__ "$@" 4 | # A convenience for development. Not installed or used in production. 5 | -------------------------------------------------------------------------------- /deimos/timestamp.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def iso(t=time.time()): 5 | ms = ("%0.03f" % (t % 1))[1:] 6 | iso = time.strftime("%FT%T", time.gmtime(t)) 7 | return iso + ms + "Z" 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | *.rpm 22 | *.deb 23 | 24 | # Installer logs 25 | pip-log.txt 26 | 27 | # Unit test / coverage reports 28 | .coverage 29 | .tox 30 | nosetests.xml 31 | 32 | # Translations 33 | *.mo 34 | 35 | # Mr Developer 36 | .mr.developer.cfg 37 | .project 38 | .pydevproject 39 | 40 | # Packaging 41 | toor 42 | -------------------------------------------------------------------------------- /deimos/usage.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import resource 3 | 4 | from deimos.logger import log 5 | from deimos._struct import _Struct 6 | 7 | 8 | def report(level=logging.DEBUG): 9 | self(level) 10 | children(level) 11 | 12 | 13 | def self(level=logging.DEBUG): 14 | log.log(level, rusage(resource.RUSAGE_SELF)) 15 | 16 | 17 | def children(level=logging.DEBUG): 18 | log.log(level, rusage(resource.RUSAGE_CHILDREN)) 19 | 20 | 21 | def rusage(target=resource.RUSAGE_SELF): 22 | r = resource.getrusage(target) 23 | fmt = "rss = %0.03fM user = %0.03f sys = %0.03f" 24 | return fmt % (r.ru_maxrss / 1024.0, r.ru_utime, r.ru_stime) 25 | -------------------------------------------------------------------------------- /deimos/_struct.py: -------------------------------------------------------------------------------- 1 | class _Struct(object): 2 | 3 | def __init__(self, **properties): 4 | self.__dict__.update(properties) 5 | self._properties = properties.keys() 6 | 7 | def __repr__(self): 8 | mod, cls = self.__class__.__module__, self.__class__.__name__ 9 | fields = ["%s=%r" % (k, v) for k, v in self.items()] 10 | return mod + "." + cls + "(" + ", ".join(fields) + ")" 11 | 12 | def keys(self): 13 | return self._properties 14 | 15 | def items(self, onlyset=False): 16 | vals = [(k, self.__dict__[k]) for k in self._properties] 17 | return [(k, v) for k, v in vals if v] if onlyset else vals 18 | 19 | def merge(self, other): 20 | # NB: Use leftmost constructor, to recheck validity of fields. 21 | return self.__class__(**dict(self.items() + other.items())) 22 | -------------------------------------------------------------------------------- /deimos/sig.py: -------------------------------------------------------------------------------- 1 | import os 2 | import signal 3 | 4 | import deimos.logger 5 | 6 | 7 | def is_signal_name(s): 8 | return s.startswith("SIG") and not s.startswith("SIG_") 9 | 10 | names = dict((getattr(signal, s), s) for s in dir(signal) if is_signal_name(s)) 11 | 12 | 13 | def install(f, signals=[signal.SIGINT, signal.SIGTERM]): 14 | log = deimos.logger.logger(2) 15 | 16 | def handler(signum, _): 17 | log.warning("%s (%d)", names.get(signum, "SIG???"), signum) 18 | response = f(signum) 19 | if type(response) == Resume: 20 | return 21 | if type(response) is int: 22 | os._exit(response) 23 | os._exit(-signum) 24 | for _ in signals: 25 | signal.signal(_, handler) 26 | 27 | 28 | class Resume(object): 29 | 30 | def __eq__(self, other): 31 | return self.__class__ == other.__class__ 32 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | tmp := /tmp/deimos 2 | prefix := usr/local 3 | PKG_REL := 0.1.$(shell date -u +'%Y%m%d%H%M%S') 4 | 5 | .PHONY: proto 6 | proto: proto/mesos.proto 7 | protoc --proto_path=proto/ --python_out=deimos/ proto/*.proto 8 | 9 | .PHONY: pep8 10 | pep8: 11 | pep8 --exclude=*_pb2.py --ignore E127 deimos | tee pep8.txt | head -n8 12 | 13 | .PHONY: deb 14 | deb: clean freeze 15 | fpm -C toor -t deb -s dir \ 16 | -n deimos -v `cat deimos/VERSION` --iteration $(PKG_REL) . 17 | 18 | .PHONY: rpm 19 | rpm: clean freeze 20 | fpm -C toor -t rpm -s dir \ 21 | -n deimos -v `cat deimos/VERSION` --iteration $(PKG_REL) . 22 | 23 | # You will have to install bbfreeze to create a package `pip install bbfreeze` 24 | # Prep: 25 | # - sudo python setup.py develop 26 | .PHONY: freeze 27 | freeze: 28 | mkdir -p toor/$(prefix)/bin 29 | mkdir -p toor/opt/mesosphere/deimos 30 | cp bin/run toor/$(prefix)/bin/deimos 31 | cp -R . $(tmp) 32 | cd $(tmp) && sudo python setup.py bdist_bbfreeze 33 | # Fix for ubuntu using directories for eggs instead of zips 34 | sudo chmod a+r $(tmp)/dist/*/protobuf*/EGG-INFO/* || : 35 | sudo cp -R $(tmp)/dist/*/* toor/opt/mesosphere/deimos 36 | 37 | .PHONY: clean 38 | clean: 39 | rm -rf toor 40 | rm -rf dist 41 | rm -rf build 42 | sudo rm -rf $(tmp) 43 | 44 | .PHONY: prep-ubuntu 45 | prep-ubuntu: 46 | sudo apt-get install ruby-dev python-pip python-dev libz-dev protobuf-compiler 47 | sudo gem install fpm 48 | sudo pip install bbfreeze 49 | -------------------------------------------------------------------------------- /deimos/proto.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import sys 3 | 4 | import google.protobuf 5 | 6 | from deimos.err import Err 7 | from deimos.logger import log 8 | 9 | 10 | class recordio(): # Really just a namespace 11 | 12 | """ 13 | Read and write length-prefixed Protobufs on the STDIO streams. 14 | """ 15 | @staticmethod 16 | def read(cls): 17 | unpacked = struct.unpack('I', sys.stdin.read(4)) 18 | size = unpacked[0] 19 | if size <= 0: 20 | raise Err("Expected non-zero size for Protobuf") 21 | data = sys.stdin.read(size) 22 | if len(data) != size: 23 | raise Err("Expected %d bytes; received %d", size, len(data)) 24 | return deserialize(cls, data) 25 | 26 | @staticmethod 27 | def write(cls, **properties): 28 | data = serialize(cls, **properties) 29 | sys.stdout.write(struct.pack('I', len(data))) 30 | sys.stdout.write(data) 31 | pass 32 | 33 | @staticmethod 34 | def writeProto(proto): 35 | data = proto.SerializeToString() 36 | sys.stdout.write(struct.pack('I', len(data))) 37 | sys.stdout.write(data) 38 | pass 39 | 40 | 41 | def serialize(cls, **properties): 42 | """ 43 | With a Protobuf class and properties as keyword arguments, sets all the 44 | properties on a new instance of the class and serializes the resulting 45 | value. 46 | """ 47 | obj = cls() 48 | for k, v in properties.iteritems(): 49 | log.debug("%s.%s = %r", cls.__name__, k, v) 50 | setattr(obj, k, v) 51 | return obj.SerializeToString() 52 | 53 | 54 | def deserialize(cls, data): 55 | obj = cls() 56 | obj.ParseFromString(data) 57 | for line in lines(obj): 58 | log.debug(line) 59 | return obj 60 | 61 | 62 | def lines(proto): 63 | s = google.protobuf.text_format.MessageToString(proto) 64 | return s.strip().split("\n") 65 | -------------------------------------------------------------------------------- /example.cfg: -------------------------------------------------------------------------------- 1 | [docker] 2 | # These are options passed to Docker *before* the call to run/images/&c. This 3 | # allows you to set the Docker host. Note that any key:value pair whatever can 4 | # be set here. If the syntax of a key is such that it resembles a JSON list, 5 | # the option will be passed multiple times, once for each item in the list. 6 | host: ["unix:///var/run/docker.sock", "tcp://localhost:2375"] 7 | 8 | [docker.index] 9 | account_libmesos: libmesos 10 | #account: theteam 11 | index: interior-node:2222 12 | #dockercfg: /mnt/secrets/dockercfg 13 | 14 | [containers.options] 15 | # This array of options is passed to 'run' if there are none in the TaskInfo. 16 | # Example: [ "-e", "SOME_KEY=SOME_VALUE" ] 17 | default: [] 18 | # These options are passed to 'run' in addition to those in the TaskInfo. 19 | append: [] 20 | # This flag causes all container options in the TaskInfo to be ignored. 21 | ignore: false 22 | 23 | [hooks] 24 | # These options specify what programs to run before a container is launched 25 | # and after one is destroyed (killed) 26 | # They are passed the following environment variables 27 | # TASK_INFO, PORT, PORTS, PORT0 -- as well as anything specified 28 | # in the Environment section of the Marathon app. 29 | # Example: onlaunch: [ "container_registry", "register" ] 30 | onlaunch: [] 31 | # Example: ondestroy: [ "container_registry", "unregister", "--now" ] 32 | ondestroy: [] 33 | 34 | [containers.image] 35 | # The image to use when none is specified in the TaskInfo. 36 | default: docker:///ubuntu 37 | # Causes Deimos to ignore the container image specified in the TaskInfo. 38 | ignore: false 39 | 40 | [uris] 41 | # When false, Deimos will leave Tar and Zip archives as-is after download. 42 | unpack: True 43 | 44 | [log] 45 | # Enable syslog and set its log level. 46 | syslog: INFO 47 | # Enable console logging and set its log level. 48 | console: DEBUG 49 | 50 | [state] 51 | root: /tmp/deimos 52 | -------------------------------------------------------------------------------- /bin/ship_it: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit -o nounset -o pipefail 3 | function -h { 4 | cat < 6 | 7 | Send Deimos to the target host. 8 | 9 | USAGE 10 | }; function --help { -h ;} # A nice way to handle -h and --help 11 | export LC_ALL=en_US.UTF-8 # A locale that works consistently 12 | 13 | function main { 14 | ship_it "$@" 15 | } 16 | 17 | function globals { 18 | export LC_ALL=en_US.UTF-8 # A locale that works consistently 19 | export LANG="$LC_ALL" 20 | }; globals 21 | 22 | function ship_it { 23 | setup.py sdist 24 | for host in "$@" 25 | do 26 | send "$host" 27 | remote "$host" --sudo -- reinstall_deimos /tmp/"$(egg)" 28 | done 29 | } 30 | 31 | function send { 32 | local host="$1" 33 | rsync -avz dist/"$(egg)" "$host":/tmp/ 34 | } 35 | 36 | function egg {( 37 | cd dist 38 | local eggs=( deimos-*.*.*.tar.gz ) 39 | out "${eggs[${#eggs[@]} - 1]}" # Choose the last by ASCIIbet 40 | )} 41 | 42 | function reinstall_deimos { 43 | ! pip_installed deimos || pip uninstall -y deimos 44 | easy_install "$1" 45 | } 46 | 47 | function pip_installed { 48 | pip show "$1" | fgrep -qx "Name: $1" 49 | } 50 | 51 | # Used like this: remote -- * 52 | function remote { 53 | local ssh=( -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ) 54 | local shell=( bash ) 55 | while [[ ${1:+isset} ]] 56 | do 57 | case "$1" in 58 | --sudo) shell=( sudo bash ) ; shift ;; 59 | --) shift ; break ;; 60 | *) ssh=( "${ssh[@]}" "$1" ) ; shift ;; 61 | esac 62 | done 63 | serialized "$@" | ssh "${ssh[@]}" "${shell[@]}" 64 | } 65 | 66 | # Set up the actor on the remote end and then send it a message. 67 | function serialized { 68 | declare -f 69 | echo set -o errexit -o nounset -o pipefail 70 | echo -n 'globals &&' 71 | printf ' %q' "$@" ; echo 72 | } 73 | 74 | function msg { out "$*" >&2 ;} 75 | function err { local x=$? ; msg "$*" ; return $(( $x == 0 ? 1 : $x )) ;} 76 | function out { printf '%s\n' "$*" ;} 77 | 78 | if [[ ${1:-} ]] && declare -F | cut -d' ' -f3 | fgrep -qx -- "${1:-}" 79 | then "$@" 80 | else main "$@" 81 | fi 82 | 83 | -------------------------------------------------------------------------------- /deimos/argv.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | def argv(*args, **opts): 5 | """ 6 | Produces an argument vector from its array of arguments and keyword 7 | options. First, the options are unpacked. When the value is a ``bool``, 8 | the option is passed without an argument if it is true and skipped if it 9 | is ``false``. When the option is one of the flat built-in types -- a 10 | ``string`` or ``unicode`` or ``bytes`` or an ``int`` or a ``long`` or a 11 | ``float`` -- it is passed literally. If the value is a subclass of 12 | ``dict``, ``.items()`` is called on it the option is passed multiple times 13 | for key-value pair, with the key and value joined by an ``=``. Otherwise, 14 | if the value is iterable, the option is passed once for each element, and 15 | each element is treated like an atomic type. Underscores in the names of 16 | options are turned in to dashes. If the name of an option is a single 17 | letter, only a single dash is used when passing it. If an option is passed 18 | with the key ``__`` and value ``True``, it is put at the end of the 19 | argument list. The arguments are appended to the end of the argument list, 20 | each on treated as an atomic type. 21 | 22 | >>> argv.argv(1, 2, 'a', u'ü', dev='/dev/cba', v=True, y=[3,2]) 23 | ['-y', '3', '-y', '2', '--dev', '/dev/cba', '-v', '1', '2', 'a', u'\xfc'] 24 | 25 | """ 26 | spacer = ["--"] if opts.get("__") else [] 27 | args = [arg(_) for _ in args] 28 | opts = [_ for k, v in opts.items() for _ in opt(k, v)] 29 | return opts + spacer + args 30 | 31 | 32 | def arg(v): 33 | if type(v) in strings: 34 | return v 35 | if type(v) in nums: 36 | return str(v) 37 | raise TypeError("Type %s is not a simple, flat type" % type(v)) 38 | 39 | 40 | def opt(k, v): 41 | k = arg(k).replace("_", "-") 42 | if k == "--": 43 | return [] 44 | k = ("--" if len(k) > 1 else "-") + k 45 | if type(v) is bool: 46 | return [k] if v else [] 47 | if type(v) in simple: 48 | return [k, arg(v)] 49 | if isinstance(v, dict): 50 | v = ["%s=%s" % (arg(kk), arg(vv)) for kk, vv in v.items()] 51 | return [_ for element in v for _ in [k, arg(element)]] 52 | 53 | 54 | nums = set([int, long, float]) 55 | strings = set([str, unicode, bytes]) 56 | simple = strings | nums 57 | -------------------------------------------------------------------------------- /deimos/cleanup.py: -------------------------------------------------------------------------------- 1 | from fcntl import LOCK_EX, LOCK_NB 2 | import glob 3 | import os 4 | import subprocess 5 | import time 6 | 7 | from deimos.cmd import Run 8 | import deimos.flock 9 | from deimos.logger import log 10 | from deimos.timestamp import iso 11 | from deimos._struct import _Struct 12 | 13 | 14 | class Cleanup(_Struct): 15 | 16 | def __init__(self, root="/tmp/deimos", optimistic=False): 17 | _Struct.__init__(self, root=root, 18 | optimistic=optimistic, 19 | lock=os.path.join(root, "cleanup")) 20 | 21 | def dirs(self, before=time.time(), exited=True): 22 | """ 23 | Provider a generator of container state directories. 24 | 25 | If exited is None, all are returned. If it is False, unexited 26 | containers are returned. If it is True, only exited containers are 27 | returned. 28 | """ 29 | timestamp = iso(before) 30 | root = os.path.join(self.root, "start-time") 31 | os.chdir(root) 32 | by_t = (d for d in glob.iglob("????-??-??T*.*Z") if d < timestamp) 33 | if exited is None: 34 | def predicate(directory): 35 | return True 36 | else: 37 | def predicate(directory): 38 | exit = os.path.join(directory, "exit") 39 | return os.path.exists(exit) is exited 40 | return (os.path.join(root, d) for d in by_t if predicate(d)) 41 | 42 | def remove(self, *args, **kwargs): 43 | errors = 0 44 | lk = deimos.flock.LK(self.lock, LOCK_EX | LOCK_NB) 45 | try: 46 | lk.lock() 47 | except deimos.flock.Err: 48 | msg = "Lock unavailable -- is cleanup already running?" 49 | if self.optimistic: 50 | log.info(msg) 51 | return 0 52 | else: 53 | log.error(msg) 54 | raise e 55 | try: 56 | for d in self.dirs(*args, **kwargs): 57 | state = deimos.state.state(d) 58 | if state is None: 59 | log.warning("Not able to load state from: %s", d) 60 | continue 61 | try: 62 | cmd = ["rm", "-rf", d + "/"] 63 | cmd += [state._mesos()] 64 | if state.cid() is not None: 65 | cmd += [state._docker()] 66 | Run()(cmd) 67 | except subprocess.CalledProcessError: 68 | errors += 1 69 | finally: 70 | lk.unlock() 71 | if errors != 0: 72 | log.error("There were failures on %d directories", errors) 73 | return 4 74 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import subprocess 5 | import sys 6 | 7 | version = "deimos/VERSION" 8 | 9 | def check_output(*popenargs, **kwargs): 10 | process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) 11 | output, unused_err = process.communicate() 12 | retcode = process.poll() 13 | if retcode: 14 | cmd = kwargs.get("args") 15 | if cmd is None: 16 | cmd = popenargs[0] 17 | error = subprocess.CalledProcessError(retcode, cmd) 18 | error.output = output 19 | raise error 20 | return output 21 | 22 | def sync_version(): 23 | code = "git describe --tags --exact-match 2>/dev/null" 24 | try: 25 | try: subprocess.check_output 26 | except: subprocess.check_output = check_output 27 | v = subprocess.check_output(code, shell=True) 28 | with open(version, "w+") as h: 29 | h.write(v) 30 | except subprocess.CalledProcessError as e: 31 | print >>sys.stderr, "Not able to determine version from Git; skipping." 32 | 33 | def read_version(): 34 | with open(version) as h: 35 | return h.read().strip() 36 | 37 | sync_version() 38 | 39 | setup(name = "deimos", 40 | license = "Apache", 41 | version = read_version(), 42 | install_requires = ["protobuf"], 43 | extras_require = { "s3": [ "awscli" ] }, 44 | description = "Mesos containerization hooks for Docker", 45 | author = "Jason Dusek", 46 | author_email = "jason.dusek@gmail.com", 47 | maintainer = "Mesosphere", 48 | maintainer_email = "support@mesosphere.io", 49 | url = "https://github.com/mesosphere/deimos", 50 | packages = [ "deimos", "deimos.containerizer" ], 51 | package_data = { "deimos": ["VERSION"] }, 52 | entry_points = { "console_scripts": ["deimos = deimos:cli"] }, 53 | classifiers = [ "Environment :: Console", 54 | "Intended Audience :: Developers", 55 | "Operating System :: Unix", 56 | "Operating System :: POSIX", 57 | "Programming Language :: Python", 58 | "Topic :: System", 59 | "Topic :: System :: Systems Administration", 60 | "Topic :: Software Development", 61 | "License :: OSI Approved :: Apache Software License", 62 | "Development Status :: 4 - Beta" ]) 63 | -------------------------------------------------------------------------------- /deimos/logger.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import logging 3 | import logging.handlers 4 | import os 5 | 6 | 7 | root = logging.getLogger("deimos") 8 | 9 | 10 | class log(): # Really just a namespace 11 | 12 | @staticmethod 13 | def debug(*args, **opts): 14 | logger(2).debug(*args, **opts) 15 | 16 | @staticmethod 17 | def info(*args, **opts): 18 | logger(2).info(*args, **opts) 19 | 20 | @staticmethod 21 | def warning(*args, **opts): 22 | logger(2).warning(*args, **opts) 23 | 24 | @staticmethod 25 | def error(*args, **opts): 26 | logger(2).error(*args, **opts) 27 | 28 | @staticmethod 29 | def critical(*args, **opts): 30 | logger(2).critical(*args, **opts) 31 | 32 | @staticmethod 33 | def exception(*args, **opts): 34 | logger(2).exception(*args, **opts) 35 | 36 | @staticmethod 37 | def log(*args, **opts): 38 | logger(2).log(*args, **opts) 39 | 40 | 41 | def initialize(console=logging.DEBUG, syslog=logging.INFO): 42 | global _settings 43 | global _initialized 44 | if _initialized: 45 | return 46 | _settings = locals() 47 | _initialized = True 48 | root.setLevel(min(level for level in [console, syslog] if level)) 49 | if console: 50 | stderr = logging.StreamHandler() 51 | fmt = "%(asctime)s.%(msecs)03d %(name)s %(message)s" 52 | stderr.setFormatter(logging.Formatter(fmt=fmt, datefmt="%H:%M:%S")) 53 | stderr.setLevel(console) 54 | root.addHandler(stderr) 55 | if syslog: 56 | dev = "/dev/log" if os.path.exists("/dev/log") else "/var/run/syslog" 57 | fmt = "deimos[%(process)d]: %(name)s %(message)s" 58 | logger = logging.handlers.SysLogHandler(address=dev) 59 | logger.setFormatter(logging.Formatter(fmt=fmt)) 60 | logger.setLevel(syslog) 61 | root.addHandler(logger) 62 | root.removeHandler(_null_handler) 63 | 64 | 65 | def logger(height=1): # http://stackoverflow.com/a/900404/48251 66 | """ 67 | Obtain a function logger for the calling function. Uses the inspect module 68 | to find the name of the calling function and its position in the module 69 | hierarchy. With the optional height argument, logs for caller's caller, and 70 | so forth. 71 | """ 72 | caller = inspect.stack()[height] 73 | scope = caller[0].f_globals 74 | function = caller[3] 75 | path = scope["__name__"] 76 | if path == "__main__" and scope["__package__"]: 77 | path = scope["__package__"] 78 | return logging.getLogger(path + "." + function + "()") 79 | 80 | _initialized = False 81 | 82 | _settings = {} 83 | 84 | try: 85 | _null_handler = logging.NullHandler() 86 | except: 87 | # Python 2.6 compatibility 88 | class NullHandler(logging.Handler): 89 | def emit(self, record): 90 | pass 91 | _null_handler = NullHandler() 92 | 93 | root.addHandler(_null_handler) 94 | -------------------------------------------------------------------------------- /deimos/cgroups.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from deimos.logger import log 4 | from deimos._struct import _Struct 5 | 6 | 7 | class CGroups(_Struct): 8 | 9 | "Holder for a container's cgroups hierarchy." 10 | 11 | def __init__(self, **cgroups_path_mapping): 12 | properties = {} 13 | for k, v in cgroups_path_mapping.items(): 14 | properties[k] = construct(v, k) 15 | _Struct.__init__(self, **properties) 16 | log.debug(" ".join(self.keys())) 17 | 18 | 19 | class CGroup(object): 20 | 21 | "A generic CGroup, allowing lookup of CGroup values as Python attributes." 22 | 23 | def __init__(self, path, name): 24 | self.path = path 25 | self.name = name 26 | 27 | def __getattr__(self, key): 28 | path = self.path + "/" + self.name + "." + key 29 | try: 30 | with open(path) as h: 31 | data = h.read() 32 | return data 33 | except OSError as e: 34 | if e.errno != errno.ENOENT: 35 | raise e 36 | log.warning("Could not read %s.%s (%s)", self.name, key, path) 37 | return None 38 | 39 | def stat_data(self): 40 | return StatFile(self.stat) 41 | 42 | 43 | def construct(path, name=None): 44 | "Selects an appropriate CGroup subclass for the given CGroup path." 45 | name = name if name else path.split("/")[4] 46 | classes = {"memory": Memory, 47 | "cpu": CPU, 48 | "cpuacct": CPUAcct} 49 | constructor = classes.get(name, CGroup) 50 | log.debug("Chose %s for: %s", constructor.__name__, path) 51 | return constructor(path, name) 52 | 53 | 54 | class Memory(CGroup): 55 | 56 | def rss(self): 57 | return int(self.stat_data().rss) 58 | 59 | def limit(self): 60 | return int(self.limit_in_bytes) 61 | 62 | 63 | class CPU(CGroup): 64 | 65 | def limit(self): 66 | return float(self.shares) / 1024 67 | # The scale factor must be the same as for the Docker module. This 68 | # scale factor is the same as the Docker tools use by default. When a 69 | # task is started without any explicit CPU limit, the limit that shows 70 | # up in CGroups is 1024. 71 | 72 | 73 | class CPUAcct(CGroup): 74 | 75 | def user_time(self): 76 | "Total user time for container in seconds." 77 | return float(self.stat_data().user) / 100 78 | 79 | def system_time(self): 80 | "Total system time for container in seconds." 81 | return float(self.stat_data().system) / 100 82 | 83 | 84 | class StatFile(_Struct): 85 | 86 | def __init__(self, data): 87 | kvs = [line.strip().split(" ") for line in data.strip().split("\n")] 88 | res = {} 89 | for kvs in kvs: 90 | if len(kvs) != 2: # Silently skip lines that aren't pairs 91 | continue 92 | k, v = kvs 93 | res[k] = v 94 | _Struct.__init__(self, **res) 95 | -------------------------------------------------------------------------------- /proto/containerizer.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import "mesos.proto"; 20 | 21 | package mesos.containerizer; 22 | 23 | option java_package = "org.apache.mesos.containerizer"; 24 | option java_outer_classname = "Protos"; 25 | 26 | 27 | /** 28 | * Encodes the launch command sent to the external containerizer 29 | * program. 30 | */ 31 | message Launch { 32 | required ContainerID container_id = 1; 33 | optional TaskInfo task_info = 2; 34 | optional ExecutorInfo executor_info = 3; 35 | optional string directory = 4; 36 | optional string user = 5; 37 | optional SlaveID slave_id = 6; 38 | optional string slave_pid = 7; 39 | optional bool checkpoint = 8; 40 | } 41 | 42 | 43 | /** 44 | * Encodes the update command sent to the external containerizer 45 | * program. 46 | */ 47 | message Update { 48 | required ContainerID container_id = 1; 49 | repeated Resource resources = 2; 50 | } 51 | 52 | 53 | /** 54 | * Encodes the wait command sent to the external containerizer 55 | * program. 56 | */ 57 | message Wait { 58 | required ContainerID container_id = 1; 59 | } 60 | 61 | 62 | /** 63 | * Encodes the destroy command sent to the external containerizer 64 | * program. 65 | */ 66 | message Destroy { 67 | required ContainerID container_id = 1; 68 | } 69 | 70 | 71 | /** 72 | * Encodes the usage command sent to the external containerizer 73 | * program. 74 | */ 75 | message Usage { 76 | required ContainerID container_id = 1; 77 | } 78 | 79 | 80 | /** 81 | * Information about a container termination, returned by the 82 | * containerizer to the slave. 83 | */ 84 | message Termination { 85 | // A container may be killed if it exceeds its resources; this will 86 | // be indicated by killed=true and described by the message string. 87 | required bool killed = 1; 88 | required string message = 2; 89 | 90 | // Exit status of the process. 91 | optional int32 status = 3; 92 | } 93 | 94 | 95 | /** 96 | * Information on all active containers returned by the containerizer 97 | * to the slave. 98 | */ 99 | message Containers { 100 | repeated ContainerID containers = 1; 101 | } 102 | -------------------------------------------------------------------------------- /deimos/cmd.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pipes 4 | import subprocess 5 | import sys 6 | 7 | import deimos.logger 8 | from deimos.err import * 9 | from deimos._struct import _Struct 10 | 11 | 12 | class Run(_Struct): 13 | 14 | def __init__(self, log=None, data=False, in_sh=True, 15 | close_stdin=True, log_stderr=True, 16 | start_level=logging.DEBUG, 17 | success_level=logging.DEBUG, 18 | error_level=logging.WARNING): 19 | _Struct.__init__(self, log=(log if log else deimos.logger.logger(2)), 20 | data=data, 21 | in_sh=in_sh, 22 | close_stdin=close_stdin, 23 | log_stderr=log_stderr, 24 | start_level=start_level, 25 | success_level=success_level, 26 | error_level=error_level) 27 | 28 | def __call__(self, argv, *args, **opts): 29 | out, err = None, None 30 | if "stdout" not in opts: 31 | opts["stdout"] = subprocess.PIPE if self.data else None 32 | if "stderr" not in opts: 33 | opts["stderr"] = subprocess.PIPE if self.log_stderr else None 34 | try: 35 | self.log.log(self.start_level, present(argv)) 36 | argv_ = in_sh(argv, not self.data) if self.in_sh else argv 37 | with open(os.devnull) as devnull: 38 | if self.close_stdin and "stdin" not in opts: 39 | opts["stdin"] = devnull 40 | p = subprocess.Popen(argv_, *args, **opts) 41 | out, err = p.communicate() 42 | code = p.wait() 43 | if code == 0: 44 | self.log.log(self.success_level, present(argv, 0)) 45 | if out is not None: 46 | self.log.log(self.success_level, "STDOUT // " + out) 47 | return out 48 | except subprocess.CalledProcessError as e: 49 | code = e.returncode 50 | self.log.log(self.error_level, present(argv, code)) 51 | if err is not None: 52 | self.log.log(self.error_level, "STDERR // " + err) 53 | raise subprocess.CalledProcessError(code, argv) 54 | 55 | 56 | def present(argv, token=None): 57 | if isinstance(token, basestring): 58 | return "%s // %s" % (token, escape(argv)) 59 | if isinstance(token, int): 60 | return "exit %d // %s" % (token, escape(argv)) 61 | return "call // %s" % escape(argv) 62 | 63 | 64 | def escape(argv): 65 | # NB: The pipes.quote() function is deprecated in Python 3 66 | return " ".join(pipes.quote(_) for _ in argv) 67 | 68 | 69 | def in_sh(argv, allstderr=True): 70 | """ 71 | Provides better error messages in case of file not found or permission 72 | denied. Note that this has nothing at all to do with shell=True, since 73 | quoting prevents the shell from interpreting any arguments -- they are 74 | passed straight on to shell exec. 75 | """ 76 | # NB: The use of single and double quotes in constructing the call really 77 | # matters. 78 | call = 'exec "$@" >&2' if allstderr else 'exec "$@"' 79 | return ["/bin/sh", "-c", call, "sh"] + argv 80 | -------------------------------------------------------------------------------- /integration-test/test-suite: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit -o nounset -o pipefail 3 | function -h { 4 | cat < 6 | test-suite short 7 | test-suite long 8 | 9 | Run a suite of integration tests. By default, the short suite of tests is 10 | run. 11 | 12 | USAGE 13 | }; function --help { -h ;} # A nice way to handle -h and --help 14 | export LC_ALL=en_US.UTF-8 # A locale that works consistently 15 | 16 | function main { 17 | [[ ! ${PTYHONPATH+isset} ]] || preamble >&2 18 | short "$@" 19 | } 20 | 21 | function globals { 22 | this="$(dirname "$0")" 23 | }; globals 24 | 25 | function preamble { 26 | cat <<\EOF 27 | You may need to set some environment variables to point to your Mesos build: 28 | 29 | # The distribute and proto eggs aren't strictly necessary. 30 | PYTHONPATH=/path/to/mesos/build/src/python/dist/mesos-*.egg 31 | 32 | You probably won't need to set MESOS_NATIVE_LIBRARY. 33 | EOF 34 | } 35 | 36 | function short { 37 | harness 60 short_tests "$@" 38 | } 39 | 40 | function long { 41 | harness 300 long_tests "$@" 42 | } 43 | 44 | function harness { 45 | local timeout="$1" ; shift 46 | ( cd "$this" && "$@" ) & 47 | local worker_process=$! 48 | trap "killtree $worker_process" TERM INT 49 | local token=/tmp/"$(printf deimos-test-suite.%04x.%04x $RANDOM $RANDOM)" 50 | ( trap 'exit 0' TERM 51 | sleep "$timeout" 52 | touch "$token" 53 | killtree "$worker_process" ) &>/dev/null & 54 | local term_process=$! 55 | trap "killtree $worker_process $term_process || true ; rm -f $token" TERM INT 56 | if wait "$worker_process" 57 | then 58 | msg "**** SUCCESS" 59 | else 60 | local code=$? 61 | [[ -e "$token" ]] && msg "**** TIMEOUT (${timeout}s)" || msg "**** FAILURE" 62 | fi 63 | killtree "$term_process" || true 64 | rm -f "$token" 65 | return "${code:-0}" 66 | } 67 | 68 | function test_ { 69 | local master="$1" test="$2" ; shift 2 70 | ./deimos-test.py --master "$master" --test "$test" "$@" 71 | } 72 | 73 | function test_sleep { 74 | test_ "$1" sleep --test.trials "${3:-2}" --test.sleep "${2:-2}" 75 | } 76 | 77 | function test_executor { 78 | test_ "$1" executor --test.trials "${2:-2}" 79 | } 80 | 81 | function test_pg { 82 | test_ "$1" pg --test.trials "${2:-2}" 83 | } 84 | 85 | function short_tests { 86 | test_sleep "$1" 87 | test_pg "$1" 88 | test_executor "$1" 89 | } 90 | 91 | function long_tests { 92 | short_tests "$1" 93 | test_sleep "$1" 10 10 94 | test_executor "$1" 5 95 | } 96 | 97 | function killtree { 98 | if [[ $# -gt 1 ]] 99 | then 100 | for arg in "$@" 101 | do killtree "$arg" || true 102 | done 103 | return 0 104 | fi 105 | kill -STOP "$1" &>/dev/null 106 | ps -e -o pid= -o ppid= | while read -r pid ppid 107 | do 108 | [[ $ppid = $1 ]] || continue 109 | killtree "$pid" || true 110 | done 111 | kill -CONT "$1" &>/dev/null 112 | kill -TERM "$1" &>/dev/null 113 | } 114 | 115 | function msg { out "$*" >&2 ;} 116 | function err { local x=$? ; msg "$*" ; return $(( $x == 0 ? 1 : $x )) ;} 117 | function out { printf '%s\n' "$*" ;} 118 | 119 | ######################### Delegates to subcommands or runs main, as appropriate 120 | if [[ ${1:-} ]] && declare -F | cut -d' ' -f3 | fgrep -qx -- "${1:-}" 121 | then "$@" 122 | else main "$@" 123 | fi 124 | 125 | -------------------------------------------------------------------------------- /deimos/flock.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import errno 3 | import fcntl 4 | import os 5 | import signal 6 | import subprocess 7 | import time 8 | 9 | import deimos.err 10 | from deimos.logger import log 11 | from deimos._struct import _Struct 12 | 13 | 14 | locks = {} 15 | 16 | 17 | class LK(_Struct): 18 | default_timeout = 10 19 | 20 | def __new__(cls, path, flags, seconds=default_timeout): 21 | if os.path.abspath(path) in locks: 22 | return locks[path] 23 | else: 24 | return super(LK, cls).__new__(cls, path, flags, seconds) 25 | 26 | def __init__(self, path, flags, seconds=default_timeout): 27 | """Construct a lockable file handle. Handles are recycled. 28 | 29 | If seconds is 0, LOCK_NB will be set. If LOCK_NB is set, seconds will 30 | be set to 0. If seconds is None, there will be no timeout; but flags 31 | will not be adjusted in any way. 32 | """ 33 | full = os.path.abspath(path) 34 | flags, seconds = nb_seconds(flags, seconds) 35 | if full not in locks: 36 | _Struct.__init__(self, path=full, 37 | handle=None, 38 | fd=None, 39 | flags=flags, 40 | seconds=seconds) 41 | locks[self.path] = self 42 | 43 | def lock(self): 44 | if self.handle is None or self.handle.closed: 45 | self.handle = open(self.path, "w+") 46 | self.fd = self.handle.fileno() 47 | if (self.flags & fcntl.LOCK_NB) != 0 or self.seconds is None: 48 | try: 49 | fcntl.flock(self.handle, self.flags) 50 | except IOError as e: 51 | if e.errno not in [errno.EACCES, errno.EAGAIN]: 52 | raise e 53 | raise Locked(self.path) 54 | else: 55 | with timeout(self.seconds): 56 | try: 57 | fcntl.flock(self.handle, self.flags) 58 | except IOError as e: 59 | errnos = [errno.EINTR, errno.EACCES, errno.EAGAIN] 60 | if e.errno not in errnos: 61 | raise e 62 | raise Timeout(self.path) 63 | 64 | def unlock(self): 65 | if not self.handle.closed: 66 | fcntl.flock(self.handle, fcntl.LOCK_UN) 67 | self.handle.close() 68 | 69 | 70 | def format_lock_flags(flags): 71 | tokens = [("EX", fcntl.LOCK_EX), ("SH", fcntl.LOCK_SH), 72 | ("UN", fcntl.LOCK_UN), ("NB", fcntl.LOCK_NB)] 73 | return "|".join(s for s, flag in tokens if (flags & flag) != 0) 74 | 75 | 76 | def nb_seconds(flags, seconds): 77 | if seconds == 0: 78 | flags |= fcntl.LOCK_NB 79 | if (flags & fcntl.LOCK_NB) != 0: 80 | seconds = 0 81 | return flags, seconds 82 | 83 | 84 | class Err(deimos.err.Err): 85 | pass 86 | 87 | 88 | class Timeout(Err): 89 | pass 90 | 91 | 92 | class Locked(Err): 93 | pass 94 | 95 | 96 | def lock_browser(directory): 97 | bash = """ 98 | set -o errexit -o nounset -o pipefail 99 | 100 | function files_by_inode { 101 | find "$1" -type f -printf '%i %p\\n' | LC_ALL=C LANG=C sort 102 | } 103 | 104 | function locking_pids_by_inode { 105 | cat /proc/locks | 106 | sed -r ' 107 | s/^.+ ([^ ]+) +([0-9]+) [^ :]+:[^ :]+:([0-9]+) .+$/\\3 \\2 \\1/ 108 | ' | LC_ALL=C LANG=C sort 109 | } 110 | 111 | join <(locking_pids_by_inode) <(files_by_inode "$1") 112 | """ 113 | subprocess.check_call(["bash", "-c", bash, "bash", 114 | os.path.abspath(directory)]) 115 | 116 | # Thanks to Glenn Maynard 117 | # http://stackoverflow.com/questions/5255220/fcntl-flock-how-to-implement-a-timeout/5255473#5255473 118 | 119 | 120 | @contextmanager 121 | def timeout(seconds): 122 | def timeout_handler(signum, frame): 123 | pass 124 | original_handler = signal.signal(signal.SIGALRM, timeout_handler) 125 | try: 126 | signal.alarm(seconds) 127 | yield 128 | finally: 129 | signal.alarm(0) 130 | signal.signal(signal.SIGALRM, original_handler) 131 | -------------------------------------------------------------------------------- /deimos/mesos.py: -------------------------------------------------------------------------------- 1 | from deimos._struct import _Struct 2 | 3 | 4 | class Launch(_Struct): 5 | 6 | def __init__(self, proto): 7 | underlying = LaunchProto(proto) 8 | self._underlying = underlying 9 | _Struct.__init__(self, executor_id=underlying.executor_id(), 10 | container_id=underlying.container_id(), 11 | container=underlying.container(), 12 | argv=underlying.argv(), 13 | env=underlying.env(), 14 | uris=underlying.uris(), 15 | ports=underlying.ports(), 16 | cpu_and_mem=underlying.cpu_and_mem(), 17 | directory=underlying.directory(), 18 | user=underlying.user(), 19 | needs_observer=underlying.needs_observer()) 20 | 21 | 22 | class LaunchProto(object): 23 | 24 | """Wraps launch proto to simplify handling of format variations 25 | 26 | For example, the resources can be in either the task_info or the 27 | executor_info. 28 | """ 29 | 30 | def __init__(self, proto): 31 | self.proto = proto 32 | 33 | def executor(self): 34 | if self.proto.HasField("task_info"): 35 | return None 36 | if self.proto.HasField("executor_info"): 37 | return self.proto.executor_info 38 | if self.proto.task_info.HasField("executor"): 39 | return self.proto.task_info.executor 40 | 41 | def command(self): 42 | if self.executor() is not None: 43 | return self.executor().command 44 | else: 45 | return self.proto.task_info.command 46 | 47 | def container(self): 48 | if self.command().HasField("container"): 49 | container = self.command().container 50 | return container.image, list(container.options) 51 | return "docker:///", [] 52 | 53 | def resources(self): 54 | # NB: We only want the executor resources when there is no task. 55 | if self.proto.HasField("task_info"): 56 | return self.proto.task_info.resources 57 | else: 58 | return self.executor().resources 59 | 60 | def executor_id(self): 61 | if self.executor() is not None: 62 | return self.executor().executor_id.value 63 | else: 64 | return self.proto.task_info.task_id.value 65 | 66 | def container_id(self): 67 | return self.proto.container_id.value 68 | 69 | def cpu_and_mem(self): 70 | cpu, mem = None, None 71 | for r in self.resources(): 72 | if r.name == "cpus": 73 | cpu = str(int(r.scalar.value * 1024)) 74 | if r.name == "mem": 75 | mem = str(int(r.scalar.value)) + "m" 76 | return (cpu, mem) 77 | 78 | def env(self): 79 | cmd = self.command() 80 | self.env = [(_.name, _.value) for _ in cmd.environment.variables] 81 | # Add task_info.name to the environment variables 82 | self.env += [("TASK_INFO", self.proto.task_info.name)] 83 | return self.env 84 | 85 | def ports(self): 86 | resources = [_.ranges.range for _ in self.resources() 87 | if _.name == 'ports'] 88 | ranges = [_ for __ in resources for _ in __] 89 | # NB: Casting long() to int() so there's no trailing 'L' in later 90 | # stringifications. Ports should only ever be shorts, anyways. 91 | ports = [range(int(_.begin), int(_.end) + 1) for _ in ranges] 92 | return [port for r in ports for port in r] 93 | 94 | def argv(self): 95 | cmd = self.command() 96 | if cmd.HasField("value") and cmd.value != "": 97 | return ["sh", "-c", cmd.value] 98 | return [] 99 | 100 | def uris(self): 101 | return list(self.command().uris) 102 | 103 | def needs_observer(self): 104 | return self.executor() is None 105 | 106 | def user(self): 107 | if self.proto.HasField("user"): 108 | return self.proto.user 109 | 110 | def directory(self): 111 | if self.proto.HasField("directory"): 112 | return self.proto.directory 113 | -------------------------------------------------------------------------------- /deimos/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import calendar 3 | import os 4 | import signal 5 | import subprocess 6 | import sys 7 | import time 8 | 9 | import deimos.cleanup 10 | import deimos.config 11 | import deimos.containerizer 12 | import deimos.containerizer.docker 13 | from deimos.err import Err 14 | import deimos.flock 15 | from deimos.logger import log 16 | import deimos.sig 17 | import deimos.usage 18 | 19 | 20 | def cli(argv=None): 21 | deimos.sig.install(lambda _: None) 22 | if argv is None: 23 | argv = sys.argv 24 | sub = argv[1] if len(argv) > 1 else None 25 | 26 | if sub in ["-h", "--help", "help"]: 27 | print format_help() 28 | return 0 29 | 30 | conf = deimos.config.load_configuration() 31 | 32 | if sub == "config": 33 | log.info("Final configuration:") 34 | for _, conf in conf.items(): 35 | print "%r" % conf 36 | return 0 37 | 38 | if sub == "locks": 39 | deimos.flock.lock_browser(os.path.join(conf.state.root, "mesos")) 40 | return 0 41 | 42 | if sub == "state": 43 | cleanup = deimos.cleanup.Cleanup(conf.state.root) 44 | t, rm = time.time(), False 45 | for arg in argv[2:]: 46 | if arg == "--rm": 47 | rm = True 48 | continue 49 | t = calendar.timegm(time.strptime(arg, "%Y-%m-%dT%H:%M:%SZ")) 50 | if rm: 51 | return cleanup.remove(t) 52 | else: 53 | for d in cleanup.dirs(t): 54 | sys.stdout.write(d + "\n") 55 | return 0 56 | 57 | if sub not in deimos.containerizer.methods(): 58 | print >>sys.stderr, format_help() 59 | print >>sys.stderr, "** Please specify a subcommand **".center(79) 60 | log.error("Bad ARGV: %r" % argv[1:]) 61 | return 1 62 | 63 | deimos.docker.options = conf.docker.argv() 64 | containerizer = deimos.containerizer.docker.Docker( 65 | container_settings=conf.containers, 66 | index_settings=conf.index, 67 | optimistic_unpack=conf.uris.unpack, 68 | hooks=conf.hooks, 69 | state_root=conf.state.root 70 | ) 71 | 72 | deimos.usage.report() 73 | try: 74 | result = deimos.containerizer.stdio(containerizer, *argv[1:]) 75 | deimos.usage.report() 76 | if result is not None: 77 | if isinstance(result, bool): 78 | return 0 if result else 1 79 | if isinstance(result, int): 80 | return result 81 | if isinstance(result, str): 82 | sys.stdout.write(result) 83 | else: 84 | for item in result: 85 | sys.stdout.write(str(item) + "\n") 86 | except Err as e: 87 | log.error("%s.%s: %s", type(e).__module__, type(e).__name__, str(e)) 88 | return 4 89 | except subprocess.CalledProcessError as e: 90 | log.error(str(e)) 91 | return 4 92 | except Exception: 93 | log.exception("Unhandled failure in %s", sub) 94 | return 8 95 | return 0 96 | 97 | 98 | def format_help(): 99 | return """ 100 | USAGE: deimos launch (--no-fork)? 101 | deimos usage 102 | deimos destroy 103 | deimos wait 104 | deimos observe 105 | deimos locks 106 | deimos state 107 | 108 | Deimos provides Mesos integration for Docker, allowing Docker to be used as 109 | an external containerizer. 110 | 111 | deimos launch (--no-fork)? 112 | 113 | Launches a container and runs the executor or command specified in the 114 | TaskInfo, passed in on standard in. 115 | 116 | The launch subcommand always watches the launched container and logs changes 117 | in its lifecycle. By default, it forks off a child to do the watching, as 118 | part of the contract external containerizers have with Mesos. With 119 | --no-fork, launch will watch the container and log in the foreground. This 120 | can be helpful during debugging. 121 | 122 | deimos usage 123 | 124 | Generates a protobuf description of the resources used by the container. 125 | 126 | deimos destroy 127 | 128 | Shuts down the specified container. 129 | 130 | deimos wait 131 | 132 | Reads STDIN to find the container to watch. 133 | 134 | deimos observe 135 | 136 | Observes the Mesos container ID, in a way that blocks all calls to `wait`. 137 | It is for internal use...probably don't want to play with this one. 138 | 139 | deimos locks 140 | 141 | List file locks taken by Deimos, associating each file with a PID, an inode, 142 | and a lock level. The same file may appear multiple times. 143 | 144 | deimos state (--rm)? 145 | 146 | List stale state directories (those with an exit file). With --rm, removes 147 | stale states. 148 | 149 | deimos config 150 | 151 | Load and display the configuration. 152 | 153 | """.strip("\n") 154 | 155 | if __name__ == "__main__": 156 | sys.exit(cli(sys.argv)) 157 | -------------------------------------------------------------------------------- /deimos/docker.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import itertools 3 | import json 4 | import logging 5 | import os 6 | import re 7 | import subprocess 8 | import sys 9 | import time 10 | 11 | from deimos.cmd import Run 12 | from deimos.err import * 13 | from deimos.logger import log 14 | from deimos._struct import _Struct 15 | 16 | 17 | def run(options, image, command=[], env={}, cpus=None, mems=None, ports=[]): 18 | envs = env.items() if isinstance(env, dict) else env 19 | pairs = [("-e", "%s=%s" % (k, v)) for k, v in envs] 20 | if ports != []: # NB: Forces external call to pre-fetch image 21 | port_pairings = list(itertools.izip_longest(ports, inner_ports(image))) 22 | log.info("Port pairings (Mesos, Docker) // %r", port_pairings) 23 | for allocated, target in port_pairings: 24 | if allocated is None: 25 | log.warning("Container exposes more ports than were allocated") 26 | break 27 | options += ["-p", "%d:%d" % (allocated, target or allocated)] 28 | argv = ["run"] + options 29 | argv += ["-c", str(cpus)] if cpus else [] 30 | argv += ["-m", str(mems)] if mems else [] 31 | argv += [_ for __ in pairs for _ in __] # This is just flatten 32 | argv += [image] + command 33 | return docker(*argv) 34 | 35 | 36 | def stop(ident): 37 | return docker("stop", "-t=2", ident) 38 | 39 | 40 | def rm(ident): 41 | return docker("rm", ident) 42 | 43 | 44 | def wait(ident): 45 | return docker("wait", ident) 46 | 47 | 48 | images = {} # Cache of image information 49 | 50 | 51 | def pull(image): 52 | Run(data=True)(docker("pull", image)) 53 | refresh_docker_image_info(image) 54 | 55 | 56 | def pull_once(image): 57 | if image_info(image) is None: 58 | pull(image) 59 | 60 | 61 | def image_info(image): 62 | if image in images: 63 | return images[image] 64 | else: 65 | return refresh_docker_image_info(image) 66 | 67 | 68 | def refresh_docker_image_info(image): 69 | try: 70 | text = Run(data=True)(docker("inspect", image)) 71 | parsed = json.loads(text)[0] 72 | images[image] = parsed 73 | return parsed 74 | except subprocess.CalledProcessError as e: 75 | return None 76 | 77 | 78 | def ensure_image(f): 79 | def f_(image, *args, **kwargs): 80 | pull_once(image) 81 | return f(image, *args, **kwargs) 82 | return f_ 83 | 84 | 85 | @ensure_image 86 | def inner_ports(image): 87 | info = image_info(image) 88 | config = info.get("Config", info.get("config")) 89 | if config: 90 | exposed = config.get("ExposedPorts", {}) 91 | if exposed and isinstance(exposed, dict): 92 | return sorted(int(k.split("/")[0]) for k in exposed.keys()) 93 | specs = config.get("PortSpecs", []) 94 | if specs and isinstance(specs, list): 95 | return sorted(int(v.split(":")[-1]) for v in specs) 96 | return [] # If all else fails... 97 | 98 | 99 | # System and process interfaces 100 | 101 | class Status(_Struct): 102 | 103 | def __init__(self, cid=None, pid=None, exit=None): 104 | _Struct.__init__(self, cid=cid, pid=pid, exit=exit) 105 | 106 | 107 | def cgroups(cid): 108 | paths = [] 109 | paths += glob.glob("/sys/fs/cgroup/*/" + cid) 110 | paths += glob.glob("/sys/fs/cgroup/*/docker/" + cid) 111 | paths += glob.glob("/cgroup/*/" + cid) 112 | paths += glob.glob("/cgroup/*/docker/" + cid) 113 | named_cgroups = [(s.split("/cgroup/")[1].split("/")[0], s) for s in paths] 114 | return dict(named_cgroups) 115 | 116 | 117 | def matching_image_for_host(distro=None, release=None, *args, **kwargs): 118 | if distro is None or release is None: 119 | # TODO: Use redhat-release, &c 120 | rel_string = Run(data=True)(["bash", "-c", """ 121 | set -o errexit -o nounset -o pipefail 122 | ( source /etc/os-release && tr A-Z a-z <<<"$ID\t$VERSION_ID" ) 123 | """]) 124 | probed_distro, probed_release = rel_string.strip().split() 125 | distro, release = (distro or probed_distro, release or probed_release) 126 | return image_token("%s:%s" % (distro, release), *args, **kwargs) 127 | 128 | 129 | def image_token(name, account=None, index=None, *args, **kwargs): 130 | return "/".join(_ for _ in [index, account, name] if _ is not None) 131 | 132 | 133 | def probe(ident, quiet=False): 134 | fields = "{{.ID}} {{.State.Pid}} {{.State.ExitCode}}" 135 | level = logging.DEBUG if quiet else logging.WARNING 136 | argv = docker("inspect", "--format=" + fields, ident) 137 | run = Run(data=True, error_level=level) 138 | text = run(argv).strip() 139 | cid, pid, exit = text.split() 140 | return Status(cid=cid, pid=pid, exit=(exit if pid == 0 else None)) 141 | 142 | 143 | def exists(ident, quiet=False): 144 | try: 145 | return probe(ident, quiet) 146 | except subprocess.CalledProcessError as e: 147 | if e.returncode != 1: 148 | raise e 149 | return None 150 | 151 | 152 | def await(ident, t=0.05, n=10): 153 | for _ in range(0, n): 154 | result = exists(ident, quiet=True) 155 | if result: 156 | return result 157 | time.sleep(t) 158 | result = exists(ident, quiet=True) 159 | if result: 160 | return result 161 | msg = "Container %s not ready after %d sleeps of %g seconds" 162 | log.warning(msg % (ident, n, t)) 163 | raise AwaitTimeout("Timed out waiting for %s" % ident) 164 | 165 | 166 | def read_wait_code(data): 167 | try: 168 | code = int(data) 169 | code = 128 + abs(code) if code < 0 else code 170 | return code % 256 171 | except: 172 | log.error("Result of `docker wait` wasn't an int: %r", data) 173 | return 111 174 | 175 | 176 | class AwaitTimeout(Err): 177 | pass 178 | 179 | 180 | # Global settings 181 | 182 | options = [] 183 | 184 | 185 | def docker(*args): 186 | return ["docker"] + options + list(args) 187 | -------------------------------------------------------------------------------- /deimos/containerizer/__init__.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import logging 3 | import os 4 | import re 5 | import subprocess 6 | 7 | try: # Prefer system installation of Mesos protos if available 8 | from mesos_pb2 import * 9 | from containerizer_pb2 import * 10 | except: 11 | from deimos.mesos_pb2 import * 12 | from deimos.containerizer_pb2 import * 13 | 14 | import deimos.cmd 15 | from deimos.logger import log 16 | from deimos.proto import recordio 17 | 18 | 19 | class Containerizer(object): 20 | 21 | def __init__(self): 22 | pass 23 | 24 | def launch(self, launch_pb, *args): 25 | pass 26 | 27 | def update(self, update_pb, *args): 28 | pass 29 | 30 | def usage(self, usage_pb, *args): 31 | pass 32 | 33 | def wait(self, wait_pb, *args): 34 | pass 35 | 36 | def destroy(self, destroy_pb, *args): 37 | pass 38 | 39 | def recover(self, *args): 40 | pass 41 | 42 | def containers(self, *args): 43 | pass 44 | 45 | def observe(self, *args): 46 | pass 47 | 48 | 49 | def methods(): 50 | "Names of operations provided by containerizers, as a set." 51 | pairs = inspect.getmembers(Containerizer, predicate=inspect.ismethod) 52 | return set(k for k, _ in pairs if k[0:1] != "_") 53 | 54 | # Not an instance method of containerizer because it shouldn't be overridden. 55 | 56 | 57 | def stdio(containerizer, *args): 58 | """Connect containerizer class to command line args and STDIN 59 | 60 | Dispatches to an appropriate containerizer method based on the first 61 | argument and parses the input using an appropriate Protobuf type. 62 | 63 | launch < containerizer::Launch 64 | update < containerizer::Update 65 | usage < containerizer::Usage > mesos::ResourceStatistics 66 | wait < containerizer::Wait > containerizer::Termination 67 | destroy < containerizer::Destroy 68 | containers > containerizer::Containers 69 | recover 70 | 71 | Output serialization must be handled by the containerizer method (it 72 | doesn't necessarily happen at the end). 73 | 74 | Not really part of the containerizer protocol but exposed by Deimos as a 75 | subcommand: 76 | 77 | # Follows a Docker ID, PID, &c and exits with an appropriate, matching 78 | # exit code, in a manner specific to the containerizer 79 | observe 80 | 81 | """ 82 | try: 83 | name = args[0] 84 | method, proto = {"launch": (containerizer.launch, Launch), 85 | "update": (containerizer.update, Update), 86 | "usage": (containerizer.usage, Usage), 87 | "wait": (containerizer.wait, Wait), 88 | "destroy": (containerizer.destroy, Destroy), 89 | "containers": (containerizer.containers, None), 90 | "recover": (containerizer.recover, None), 91 | "observe": (containerizer.observe, None)}[name] 92 | except IndexError: 93 | raise Err("Please choose a subcommand") 94 | except KeyError: 95 | raise Err("Subcommand %s is not valid for containerizers" % name) 96 | log.debug("%r", (method, proto)) 97 | if proto is not None: 98 | return method(recordio.read(proto), *args[1:]) 99 | else: 100 | return method(*args[1:]) 101 | 102 | 103 | # Mesos interface helpers 104 | 105 | MESOS_ESSENTIAL_ENV = ["MESOS_SLAVE_ID", "MESOS_SLAVE_PID", 106 | "MESOS_FRAMEWORK_ID", "MESOS_EXECUTOR_ID", 107 | "MESOS_CHECKPOINT", "MESOS_RECOVERY_TIMEOUT"] 108 | 109 | 110 | def mesos_env(): 111 | env = os.environ.get 112 | return [(k, env(k)) for k in MESOS_ESSENTIAL_ENV if env(k)] 113 | 114 | 115 | def log_mesos_env(level=logging.INFO): 116 | for k, v in os.environ.items(): 117 | if k.startswith("MESOS_") or k.startswith("LIBPROCESS_"): 118 | log.log(level, "%s=%s" % (k, v)) 119 | 120 | 121 | def mesos_directory(): 122 | if "MESOS_DIRECTORY" not in os.environ: 123 | return 124 | work_dir = os.path.abspath(os.getcwd()) 125 | task_dir = os.path.abspath(os.environ["MESOS_DIRECTORY"]) 126 | if task_dir != work_dir: 127 | log.info("Changing directory to MESOS_DIRECTORY=%s", task_dir) 128 | os.chdir(task_dir) 129 | 130 | 131 | def mesos_executor(): 132 | return os.path.join(os.environ["MESOS_LIBEXEC_DIRECTORY"], 133 | "mesos-executor") 134 | 135 | 136 | def mesos_default_image(): 137 | return os.environ.get("MESOS_DEFAULT_CONTAINER_IMAGE") 138 | 139 | 140 | def place_uris(launchy, directory, optimistic_unpack=False): 141 | cmd = deimos.cmd.Run() 142 | cmd(["mkdir", "-p", directory]) 143 | for item in launchy.uris: 144 | uri = item.value 145 | gen_unpack_cmd = unpacker(uri) if optimistic_unpack else None 146 | log.info("Retrieving URI: %s", deimos.cmd.escape([uri])) 147 | try: 148 | basename = uri.split("/")[-1] 149 | f = os.path.join(directory, basename) 150 | if basename == "": 151 | raise IndexError 152 | except IndexError: 153 | log.info("Not able to determine basename: %r", uri) 154 | continue 155 | try: 156 | cmd(fetcher_command(uri, f)) 157 | except subprocess.CalledProcessError as e: 158 | log.warning("Failed while processing URI: %s", 159 | deimos.cmd.escape([uri])) 160 | continue 161 | if item.executable: 162 | os.chmod(f, 0755) 163 | if gen_unpack_cmd is not None: 164 | log.info("Unpacking %s" % f) 165 | cmd(gen_unpack_cmd(f, directory)) 166 | cmd(["rm", "-f", f]) 167 | 168 | 169 | def fetcher_command(uri, target): 170 | if uri[0:5] == "s3://": 171 | return ["aws", "s3", "cp", uri, target] 172 | return ["curl", "-sSfL", uri, "--output", target] 173 | 174 | 175 | def unpacker(uri): 176 | if re.search(r"[.](t|tar[.])(bz2|xz|gz)$", uri): 177 | return lambda f, directory: ["tar", "-C", directory, "-xf", f] 178 | if re.search(r"[.]zip$", uri): 179 | return lambda f, directory: ["unzip", "-d", directory, f] 180 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | deimos 3 | ====== 4 | 5 | Deimos is a Docker plugin for Mesos, providing external containerization as 6 | described in `MESOS-816`_. 7 | 8 | NOTE: Mesos 0.20.0 shipped with built-in Docker support and this project is no longer actively maintained. 9 | More info on the `Docker in Mesos 0.20.0`_ docs page. 10 | 11 | ------------ 12 | Installation 13 | ------------ 14 | 15 | For a complete installation walkthrough, see `this article`_. 16 | 17 | Deimos can be installed `from the Cheeseshop`_. 18 | 19 | .. code-block:: bash 20 | 21 | pip install deimos 22 | 23 | ---------------------------- 24 | Passing Parameters to Docker 25 | ---------------------------- 26 | 27 | In Mesos, every successful resource offer is ultimately followed up with a 28 | ``TaskInfo`` that describes the work to be done. Within the ``TaskInfo`` is a 29 | ``CommandInfo`` and within the ``CommandInfo`` there is a ``ContainerInfo`` 30 | (following `MESOS-816`_). The ``ContainerInfo`` structure allows specification 31 | of an *image URL* and *container options*. For example: 32 | 33 | .. code-block:: c 34 | 35 | { 36 | container = ContainerInfo { 37 | image = "docker:///ubuntu" 38 | options = ["-c", "10240"] 39 | } 40 | } 41 | 42 | Deimos handles image URLs beginning with ``docker:///`` by stripping the 43 | prefix and using the remainder as the image name. The container options are 44 | passed to ``docker run`` when the task is launched. If a ``//`` is found in the options list, all the following arguments will be appended to the end of the run command. This is useful when using an image with an entry point defined. 45 | For example: 46 | 47 | .. code-block:: c 48 | 49 | { 50 | container = ContainerInfo { 51 | image = "docker:///flynn/slugrunner" 52 | options = ["//", "start", "web"] 53 | } 54 | } 55 | 56 | If no ``ContainerInfo`` is present in a task, Deimos will still containerize 57 | it, by using the ``--default_container_image`` passed to the slave, or taking 58 | a reasonable guess based on the host's distribution and release. 59 | 60 | Some options for Docker, like ``-H``, do not apply only to ``docker run``. 61 | These options should be set in the Deimos configuration file. 62 | 63 | Deimos recognizes Mesos resources that specify ports, CPUs, and memory and 64 | translates them to appropriate Docker options. 65 | 66 | 67 | ----------------------------------- 68 | Passing Parameters through Marathon 69 | ----------------------------------- 70 | 71 | Marathon has a REST API to submit JSON-formatted requests to run long-running commands. 72 | 73 | From this JSON object, the following keys are used by Deimos: 74 | 75 | * ``container`` A nested object with details about what Docker image to run 76 | 77 | * ``image`` What Docker image to run, it may have a custom registry but 78 | must have a version tag 79 | 80 | * ``options`` A list of extra options to add to the Docker invocation 81 | 82 | * ``cmd`` What command to run with Docker inside the image. Deimos 83 | automatically adds ``/bin/sh -c`` to the front 84 | 85 | * ``env`` Extra environment variables to pass to the Docker image 86 | 87 | * ``cpus`` How many CPU shares to give to the container, can be fractional, 88 | gets multiplied by 1024 and added with ``docker run -c`` 89 | 90 | * ``mem`` How much memory to give to the container, in megabytes 91 | 92 | .. code-block:: bash 93 | 94 | curl -v -X POST http://mesos1.it.corp:8080/v2/apps \ 95 | -H Content-Type:application/json -d '{ 96 | "id": "marketing", 97 | "container": { 98 | "image": "docker:///registry.int/marketing:latest", 99 | "options": ["-v", "/srv:/srv"] 100 | }, 101 | "cmd": "/webapp/script/start.sh", 102 | "env": {"VAR":"VALUE"}, 103 | "cpus": 2, 104 | "mem": 768.0, 105 | "instances": 2 106 | }' 107 | 108 | This turns into a Docker execution line similar to this: 109 | 110 | .. code-block:: bash 111 | 112 | docker run --sig-proxy --rm \ 113 | --cidfile /tmp/deimos/mesos/10330424-95c2-4119-b2a5-df8e1d1eead9/cid \ 114 | -w /tmp/mesos-sandbox \ 115 | -v /tmp/deimos/mesos/10330424-95c2-4119-b2a5-df8e1d1eead9/fs:/tmp/mesos-sandbox \ 116 | -v /srv:/srv -p 31014:3000 \ 117 | -c 2048 -m 768m \ 118 | -e PORT=31014 -e PORT0=31014 -e PORTS=31014 -e VAR=VALUE \ 119 | registry.int/marketing:latest sh -c "/webapp/script/start.sh" 120 | 121 | 122 | ------- 123 | Logging 124 | ------- 125 | 126 | Deimos logs to the console when run interactively and to syslog when run in the 127 | background. You can configure logging explicitly in the Deimos configuration 128 | file. 129 | 130 | 131 | ------------- 132 | Configuration 133 | ------------- 134 | 135 | There is an example configuration file in ``example.cfg`` which documents all 136 | the configuration options. The two config sections that are likely to be most 137 | important in production are: 138 | 139 | * ``[docker]``: global Docker options (``--host``) 140 | 141 | * ``[log]``: logging settings 142 | 143 | Configuration files are searched in this order: 144 | 145 | .. code-block:: bash 146 | 147 | ./deimos.cfg 148 | ~/.deimos 149 | /etc/deimos.cfg 150 | /usr/etc/deimos.cfg 151 | /usr/local/etc/deimos.cfg 152 | 153 | Only one configuration file -- the first one found -- is loaded. To see what 154 | Deimos thinks its configuration is, run ``deimos config``. 155 | 156 | 157 | ------------------- 158 | The State Directory 159 | ------------------- 160 | 161 | Deimos creates a state directory for each container, by default under 162 | ``/tmp/deimos``, where it tracks the container's status, start time and PID. 163 | File locks are maintained for each container to coordinate invocations of 164 | Deimos that start, stop and probe the container. 165 | 166 | To clean up state directories belonging to exited containers, invoke Deimos 167 | as follows: 168 | 169 | .. code-block:: bash 170 | 171 | deimos state --rm 172 | 173 | This task can be run safely from Cron at a regular interval. In the future, 174 | Deimos will not require separate invocation of the ``state`` subcommand for 175 | regular operation. 176 | 177 | 178 | ------------------------------- 179 | Configuring Mesos To Use Deimos 180 | ------------------------------- 181 | 182 | Only the slave needs to be configured. Set these options: 183 | 184 | .. code-block:: bash 185 | 186 | --containerizer_path=/usr/local/bin/deimos --isolation=external 187 | 188 | The packaged version of Mesos can also load these options from files: 189 | 190 | .. code-block:: bash 191 | 192 | echo /usr/local/bin/deimos > /etc/mesos-slave/containerizer_path 193 | echo external > /etc/mesos-slave/isolation 194 | 195 | 196 | .. _`from the Cheeseshop`: https://pypi.python.org/pypi/deimos 197 | 198 | .. _MESOS-816: https://issues.apache.org/jira/browse/MESOS-816 199 | 200 | .. _`this article`: https://mesosphere.io/learn/run-docker-on-mesosphere/ 201 | 202 | .. _`Docker in Mesos 0.20.0`: http://mesos.apache.org/documentation/latest/docker-containerizer/ 203 | -------------------------------------------------------------------------------- /deimos/config.py: -------------------------------------------------------------------------------- 1 | from ConfigParser import SafeConfigParser, NoSectionError 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | 7 | import deimos.argv 8 | import deimos.docker 9 | from deimos.logger import log 10 | import deimos.logger 11 | from deimos._struct import _Struct 12 | 13 | 14 | def load_configuration(f=None, interactive=sys.stdout.isatty()): 15 | error = None 16 | defaults = _Struct(docker=Docker(), 17 | index=DockerIndex(), 18 | containers=Containers(), 19 | uris=URIs(), 20 | state=State(), 21 | hooks=Hooks(), 22 | log=Log( 23 | console=(logging.DEBUG if interactive else None), 24 | syslog=(logging.INFO if not interactive else None) 25 | )) 26 | parsed = None 27 | try: 28 | f = f if f else path() 29 | if f: 30 | parsed = parse(f) 31 | except Exception as e: 32 | error = e 33 | finally: 34 | confs = defaults.merge(parsed) if parsed else defaults 35 | deimos.logger.initialize(**dict(confs.log.items())) 36 | if error: 37 | pre = ("Error loading %s: " % f) if f else "" 38 | log.exception(pre + str(error)) 39 | sys.exit(16) 40 | if parsed: 41 | log.info("Loaded configuration from %s" % f) 42 | for _, conf in parsed.items(): 43 | log.debug("Found: %r", conf) 44 | return confs 45 | 46 | 47 | def coercearray(array): 48 | if type(array) in deimos.argv.strings: 49 | if array[0:1] != "[": 50 | return [array] 51 | try: 52 | arr = json.loads(array) 53 | if type(arr) is not list: 54 | raise ValueError() 55 | return arr 56 | except: 57 | raise ValueError("Not an array: %s" % array) 58 | return list(array) 59 | 60 | 61 | def coerceloglevel(level): 62 | if not level: 63 | return 64 | if type(level) is int: 65 | return level 66 | levels = {"DEBUG": logging.DEBUG, 67 | "INFO": logging.INFO, 68 | "WARNING": logging.WARNING, 69 | "ERROR": logging.ERROR, 70 | "CRITICAL": logging.CRITICAL, 71 | "NOTSET": logging.NOTSET} 72 | try: 73 | return levels[level] 74 | except: 75 | raise ValueError("Not a log level: %s" % level) 76 | 77 | 78 | def coercebool(b): 79 | if type(b) is bool: 80 | return b 81 | try: 82 | bl = json.loads(b) 83 | if type(bl) is not bool: 84 | raise ValueError() 85 | return bl 86 | except: 87 | raise ValueError("Not a bool: %s" % b) 88 | 89 | 90 | def coerceoption(val): 91 | try: 92 | return coercearray(val) 93 | except: 94 | return coercebool(val) 95 | 96 | 97 | class Image(_Struct): 98 | 99 | def __init__(self, default=None, ignore=False): 100 | _Struct.__init__(self, default=default, ignore=coercebool(ignore)) 101 | 102 | def override(self, image=None): 103 | return image if (image and not self.ignore) else self.default 104 | 105 | 106 | class Hooks(_Struct): 107 | 108 | def __init__(self, unpack=False, onlaunch=[], ondestroy=[]): 109 | _Struct.__init__(self, onlaunch=coercearray(onlaunch), 110 | ondestroy=coercearray(ondestroy)) 111 | 112 | def override(self, unpack=False, onlaunch=[], ondestroy=[]): 113 | onl = self.onlaunch.override(onlaunch) 114 | ond = self.ondestroy.override(ondestroy) 115 | return onl, ond 116 | 117 | 118 | class Options(_Struct): 119 | 120 | def __init__(self, default=[], append=[], ignore=False): 121 | _Struct.__init__(self, default=coercearray(default), 122 | append=coercearray(append), 123 | ignore=coercebool(ignore)) 124 | 125 | def override(self, options=[]): 126 | a = options if (len(options) > 0 and not self.ignore) else self.default 127 | return a + self.append 128 | 129 | 130 | class Containers(_Struct): 131 | 132 | def __init__(self, image=Image(), options=Options()): 133 | _Struct.__init__(self, image=image, options=options) 134 | 135 | def override(self, image=None, options=[]): 136 | return self.image.override(image), self.options.override(options) 137 | 138 | 139 | class URIs(_Struct): 140 | 141 | def __init__(self, unpack=True): 142 | _Struct.__init__(self, unpack=coercebool(unpack)) 143 | 144 | 145 | class Log(_Struct): 146 | 147 | def __init__(self, console=None, syslog=None): 148 | _Struct.__init__(self, console=coerceloglevel(console), 149 | syslog=coerceloglevel(syslog)) 150 | 151 | 152 | class Docker(_Struct): 153 | 154 | def __init__(self, **properties): 155 | for k in properties.keys(): 156 | properties[k] = coerceoption(properties[k]) 157 | _Struct.__init__(self, **properties) 158 | 159 | def argv(self): 160 | return deimos.argv.argv(**dict(self.items())) 161 | 162 | 163 | class DockerIndex(_Struct): 164 | 165 | def __init__(self, index=None, account_libmesos="libmesos", 166 | account=None, 167 | dockercfg=None): 168 | _Struct.__init__(self, index=index, 169 | account_libmesos=account_libmesos, 170 | account=account, 171 | dockercfg=dockercfg) 172 | 173 | 174 | class State(_Struct): 175 | 176 | def __init__(self, root="/tmp/deimos"): 177 | if ":" in root: 178 | raise ValueError("Deimos root storage path must not contain ':'") 179 | _Struct.__init__(self, root=root) 180 | 181 | 182 | def parse(f): 183 | config = SafeConfigParser() 184 | config.read(f) 185 | parsed = {} 186 | sections = [("log", Log), ("state", State), ("uris", URIs), 187 | ("docker", Docker), 188 | ("docker.index", DockerIndex), 189 | ("containers.image", Image), 190 | ("hooks", Hooks), 191 | ("containers.options", Options)] 192 | for key, cls in sections: 193 | try: 194 | parsed[key] = cls(**dict(config.items(key))) 195 | except: 196 | continue 197 | containers = {} 198 | if "containers.image" in parsed: 199 | containers["image"] = parsed["containers.image"] 200 | del parsed["containers.image"] 201 | if "containers.options" in parsed: 202 | containers["options"] = parsed["containers.options"] 203 | del parsed["containers.options"] 204 | if len(containers) > 0: 205 | parsed["containers"] = Containers(**containers) 206 | if "docker.index" in parsed: 207 | parsed["index"] = parsed["docker.index"] 208 | del parsed["docker.index"] 209 | return _Struct(**parsed) 210 | 211 | 212 | def path(): 213 | for p in search_path: 214 | if os.path.exists(p): 215 | return p 216 | 217 | search_path = ["./deimos.cfg", 218 | os.path.expanduser("~/.deimos"), 219 | "/etc/deimos.cfg", 220 | "/usr/etc/deimos.cfg", 221 | "/usr/local/etc/deimos.cfg"] 222 | -------------------------------------------------------------------------------- /deimos/state.py: -------------------------------------------------------------------------------- 1 | import errno 2 | from fcntl import LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN 3 | import itertools 4 | import os 5 | import random 6 | import signal 7 | import time 8 | 9 | import deimos.docker 10 | from deimos.err import * 11 | from deimos.logger import log 12 | from deimos._struct import _Struct 13 | from deimos.timestamp import iso 14 | 15 | 16 | class State(_Struct): 17 | 18 | def __init__(self, root, docker_id=None, mesos_id=None, executor_id=None): 19 | _Struct.__init__(self, root=os.path.abspath(root), 20 | docker_id=docker_id, 21 | mesos_id=mesos_id, 22 | executor_id=executor_id, 23 | timestamp=None) 24 | 25 | def resolve(self, *args, **kwargs): 26 | if self.mesos_id is not None: 27 | return self._mesos(*args, **kwargs) 28 | else: 29 | return self._docker(*args, **kwargs) 30 | 31 | def mesos_container_id(self): 32 | if self.mesos_id is None: 33 | self.mesos_id = self._readf("mesos-container-id") 34 | return self.mesos_id 35 | 36 | def eid(self): 37 | if self.executor_id is None: 38 | self.executor_id = self._readf("eid") 39 | return self.executor_id 40 | 41 | def sandbox_symlink(self, value=None): 42 | p = self.resolve("fs") 43 | if value is not None: 44 | link(value, p) 45 | return p 46 | 47 | def pid(self, value=None): 48 | if value is not None: 49 | self._writef("pid", str(value)) 50 | data = self._readf("pid") 51 | if data is not None: 52 | return int(data) 53 | 54 | def cid(self, refresh=False): 55 | if self.docker_id is None or refresh: 56 | self.docker_id = self._readf("cid") 57 | return self.docker_id 58 | 59 | def t(self): 60 | if self.timestamp is None: 61 | self.timestamp = self._readf("t") 62 | return self.timestamp 63 | 64 | def await_cid(self, seconds=60): 65 | base = 0.05 66 | start = time.time() 67 | steps = [1.0, 1.25, 1.6, 2.0, 2.5, 3.2, 4.0, 5.0, 6.4, 8.0] 68 | scales = (10.0 ** n for n in itertools.count()) 69 | scaled = ([scale * step for step in steps] for scale in scales) 70 | sleeps = itertools.chain.from_iterable(scaled) 71 | log.info("Awaiting CID file: %s", self.resolve("cid")) 72 | while self.cid(refresh=True) in [None, ""]: 73 | time.sleep(next(sleeps)) 74 | if time.time() - start >= seconds: 75 | raise CIDTimeout("No CID file after %ds" % seconds) 76 | 77 | def await_launch(self): 78 | lk_l = self.lock("launch", LOCK_SH) 79 | self.ids(3) 80 | if self.cid() is None: 81 | lk_l.unlock() 82 | self.await_cid() 83 | lk_l = self.lock("launch", LOCK_SH) 84 | return lk_l 85 | 86 | def lock(self, name, flags, seconds=60): 87 | fmt_time = "indefinite" if seconds is None else "%ds" % seconds 88 | fmt_flags = deimos.flock.format_lock_flags(flags) 89 | flags, seconds = deimos.flock.nb_seconds(flags, seconds) 90 | log.info("request // %s %s (%s)", name, fmt_flags, fmt_time) 91 | p = self.resolve(os.path.join("lock", name), mkdir=True) 92 | lk = deimos.flock.LK(p, flags, seconds) 93 | try: 94 | lk.lock() 95 | except deimos.flock.Err: 96 | log.error("failure // %s %s (%s)", name, fmt_flags, fmt_time) 97 | raise 98 | if (flags & LOCK_EX) != 0: 99 | lk.handle.write(iso() + "\n") 100 | log.info("success // %s %s (%s)", name, fmt_flags, fmt_time) 101 | return lk 102 | 103 | def exit(self, value=None): 104 | if value is not None: 105 | self._writef("exit", str(value)) 106 | data = self._readf("exit") 107 | if data is not None: 108 | return deimos.docker.read_wait_code(data) 109 | 110 | def push(self): 111 | self._mkdir() 112 | properties = [("cid", self.docker_id), 113 | ("mesos-container-id", self.mesos_id), 114 | ("eid", self.executor_id)] 115 | self.set_start_time() 116 | for k, v in properties: 117 | if v is not None and not os.path.exists(self.resolve(k)): 118 | self._writef(k, v) 119 | if self.cid() is not None: 120 | docker = os.path.join(self.root, "docker", self.cid()) 121 | link("../mesos/" + self.mesos_id, docker) 122 | 123 | def set_start_time(self): 124 | if self.t() is not None: 125 | return 126 | d = os.path.abspath(os.path.join(self.root, "start-time")) 127 | create(d) 128 | start, t = time.time(), iso() 129 | while time.time() - start <= 1.0: 130 | try: 131 | p = os.path.join(d, t) 132 | os.symlink("../mesos/" + self.mesos_id, p) 133 | self._writef("t", t) 134 | self.timestamp = t 135 | return 136 | except OSError as e: 137 | if e.errno != errno.EEXIST: 138 | raise 139 | time.sleep(random.uniform(0.005, 0.025)) 140 | t = iso() 141 | 142 | def _mkdir(self): 143 | create(self._mesos()) 144 | 145 | def _readf(self, path): 146 | f = self.resolve(path) 147 | if os.path.exists(f): 148 | with open(f) as h: 149 | return h.read().strip() 150 | 151 | def _writef(self, path, value): 152 | f = self.resolve(path) 153 | with open(f, "w+") as h: 154 | h.write(value + "\n") 155 | h.flush() 156 | 157 | def _docker(self, path=None, mkdir=False): 158 | if path is None: 159 | p = os.path.join(self.root, "docker", self.docker_id) 160 | else: 161 | p = os.path.join(self.root, "docker", self.docker_id, path) 162 | p = os.path.abspath(p) 163 | if mkdir: 164 | docker = os.path.join(self.root, "docker", self.docker_id) 165 | if not os.path.exists(docker): 166 | log.error("No Docker symlink (this should be impossible)") 167 | raise Err("Bad Docker symlink state") 168 | create(os.path.dirname(p)) 169 | return p 170 | 171 | def _mesos(self, path=None, mkdir=False): 172 | if path is None: 173 | p = os.path.join(self.root, "mesos", self.mesos_id) 174 | else: 175 | p = os.path.join(self.root, "mesos", self.mesos_id, path) 176 | p = os.path.abspath(p) 177 | if mkdir: 178 | create(os.path.dirname(p)) 179 | return p 180 | 181 | def ids(self, height=2): 182 | log = deimos.logger.logger(height) 183 | if self.eid() is not None: 184 | log.info("eid = %s", self.eid()) 185 | if self.mesos_container_id() is not None: 186 | log.info("mesos = %s", self.mesos_container_id()) 187 | if self.cid() is not None: 188 | log.info("docker = %s", self.cid()) 189 | 190 | def exists(self): 191 | path = None 192 | if self.mesos_id is not None: 193 | path = os.path.join(self.root, "mesos", self.mesos_id) 194 | if self.docker_id is not None: 195 | path = os.path.join(self.root, "docker", self.docker_id) 196 | if path is not None: 197 | return os.path.exists(path) 198 | return False 199 | 200 | 201 | class CIDTimeout(Err): 202 | pass 203 | 204 | 205 | def create(path): 206 | if not os.path.exists(path): 207 | os.makedirs(path) 208 | 209 | 210 | def link(source, target): 211 | if not os.path.exists(target): 212 | create(os.path.dirname(target)) 213 | os.symlink(source, target) 214 | 215 | 216 | def state(directory): 217 | mesos = os.path.join(directory, "mesos-container-id") 218 | if os.path.exists(mesos): 219 | with open(mesos) as h: 220 | mesos_id = h.read().strip() 221 | root = os.path.dirname(os.path.dirname(os.path.realpath(directory))) 222 | return State(root=root, mesos_id=mesos_id) 223 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /integration-test/deimos-test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import collections 4 | import os 5 | import logging 6 | import random 7 | import signal 8 | import sys 9 | import threading 10 | import time 11 | 12 | import google.protobuf as pb 13 | 14 | os.environ["GLOG_minloglevel"] = "3" # Set before mesos module is loaded 15 | import mesos 16 | import mesos_pb2 17 | 18 | 19 | #################################### Schedulers implement the integration tests 20 | 21 | class Scheduler(mesos.Scheduler): 22 | def __init__(self, trials=10): 23 | self.token = "%08x" % random.getrandbits(32) 24 | self.trials = trials 25 | self.tasks = [] 26 | self.statuses = {} 27 | self.log = log.getChild("scheduler") 28 | self.loggers = {} 29 | def __repr__(self): 30 | return "%s(%r)" % (self.__class__, self.__dict__) 31 | def registered(self, driver, framework_id, master): 32 | self.framework_id = framework_id 33 | self.log.info("Registered with ID:\n %s" % framework_id.value) 34 | def statusUpdate(self, driver, update): 35 | task, code = update.task_id.value, update.state 36 | if self.statuses.get(task, None) in Scheduler.terminal: 37 | self.loggers[task].info(present_status(update) + " (redundant)") 38 | else: 39 | self.loggers[task].info(present_status(update)) 40 | self.statuses[task] = code 41 | def all_tasks_done(self): 42 | agg = [_ for _ in self.statuses.values() if _ in Scheduler.terminal] 43 | return len(agg) >= self.trials 44 | def sum_up(self): 45 | sums = [ "%s=%d" % (k, v) for k, v in self.task_status_summary() ] 46 | log.info(" ".join(sums)) 47 | def task_status_summary(self): 48 | counts = collections.defaultdict(int) 49 | for task, code in self.statuses.items(): 50 | counts[code] += 1 51 | return [ (mesos_pb2.TaskState.Name(code), count) 52 | for code, count in counts.items() ] 53 | def next_task_id(self): 54 | short_id = "%s.task-%02d" % (self.token, len(self.tasks)) 55 | long_id = "deimos-test." + short_id 56 | self.loggers[long_id] = log.getChild(short_id) 57 | return long_id 58 | terminal = set([ mesos_pb2.TASK_FINISHED, 59 | mesos_pb2.TASK_FAILED, 60 | mesos_pb2.TASK_KILLED, 61 | mesos_pb2.TASK_LOST ]) 62 | failed = set([ mesos_pb2.TASK_FAILED, 63 | mesos_pb2.TASK_KILLED, 64 | mesos_pb2.TASK_LOST ]) 65 | 66 | class SleepScheduler(Scheduler): 67 | wiki = "https://en.wikipedia.org/wiki/Main_Page" 68 | def __init__(self, sleep=10, uris=[wiki], container=None, trials=5): 69 | Scheduler.__init__(self, trials) 70 | self.sleep = sleep 71 | self.uris = uris 72 | self.container = container 73 | self.done = [] 74 | def statusUpdate(self, driver, update): 75 | super(type(self), self).statusUpdate(driver, update) 76 | if self.all_tasks_done(): 77 | self.sum_up() 78 | driver.stop() 79 | def resourceOffers(self, driver, offers): 80 | delay = int(float(self.sleep) / self.trials) 81 | for offer in offers: 82 | if len(self.tasks) >= self.trials: break 83 | # time.sleep(self.sleep + 0.5) 84 | time.sleep(delay) # Space out the requests a bit 85 | tid = self.next_task_id() 86 | sid = offer.slave_id 87 | cmd = "date -u +%T ; sleep " + str(self.sleep) + " ; date -u +%T" 88 | task = task_with_command(tid, sid, cmd, self.uris, self.container) 89 | self.tasks += [task] 90 | self.loggers[tid].info(present_task(task)) 91 | driver.launchTasks(offer.id, [task]) 92 | 93 | class PGScheduler(Scheduler): 94 | def __init__(self, sleep=10, 95 | container="docker:///zaiste/postgresql", 96 | trials=10): 97 | Scheduler.__init__(self, trials) 98 | self.container = container 99 | self.sleep = sleep 100 | def statusUpdate(self, driver, update): 101 | super(type(self), self).statusUpdate(driver, update) 102 | if update.state == mesos_pb2.TASK_RUNNING: 103 | def end_task(): 104 | time.sleep(self.sleep) 105 | driver.killTask(update.task_id) 106 | thread = threading.Thread(target=end_task) 107 | thread.daemon = True 108 | thread.start() 109 | if self.all_tasks_done(): 110 | self.sum_up() 111 | driver.stop() 112 | def resourceOffers(self, driver, offers): 113 | for offer in offers: 114 | if len(self.tasks) >= self.trials: break 115 | tid = self.next_task_id() 116 | sid = offer.slave_id 117 | task = task_with_daemon(tid, sid, self.container) 118 | self.tasks += [task] 119 | self.loggers[tid].info(present_task(task)) 120 | driver.launchTasks(offer.id, [task]) 121 | 122 | class ExecutorScheduler(Scheduler): 123 | sh = "python deimos-test.py --executor" 124 | this = "file://" + os.path.abspath(__file__) 125 | libmesos = "docker:///libmesos/ubuntu" 126 | shutdown_message = "shutdown" 127 | def __init__(self, command=sh, uris=[this], container=libmesos, trials=10): 128 | Scheduler.__init__(self, trials) 129 | self.command = command 130 | self.uris = uris 131 | self.container = container 132 | self.messages = [] 133 | self.executor = "deimos-test.%s.executor" % self.token 134 | def statusUpdate(self, driver, update): 135 | super(type(self), self).statusUpdate(driver, update) 136 | if self.all_tasks_done(): 137 | sid = update.slave_id 138 | eid = mesos_pb2.ExecutorID() 139 | eid.value = self.executor 140 | driver.sendFrameworkMessage(eid, sid, type(self).shutdown_message) 141 | self.sum_up() 142 | driver.stop() 143 | def frameworkMessage(self, driver, eid, sid, msg): 144 | self.messages += [msg] 145 | driver.killTask(update.task_id) 146 | def resourceOffers(self, driver, offers): 147 | for offer in offers: 148 | if len(self.tasks) >= self.trials: break 149 | tid = self.next_task_id() 150 | task = task_with_executor(tid, offer.slave_id, self.executor, 151 | self.command, self.uris, self.container) 152 | self.tasks += [task] 153 | self.loggers[tid].info(present_task(task)) 154 | driver.launchTasks(offer.id, [task]) 155 | 156 | class ExecutorSchedulerExecutor(mesos.Executor): 157 | def launchTask(self, driver, task): 158 | def run(): 159 | log.info("Running task %s" % task.task_id.value) 160 | update = mesos_pb2.TaskStatus() 161 | update.task_id.value = task.task_id.value 162 | update.state = mesos_pb2.TASK_RUNNING 163 | driver.sendStatusUpdate(update) 164 | log.info("Sent: TASK_RUNNING") 165 | update = mesos_pb2.TaskStatus() 166 | update.task_id.value = task.task_id.value 167 | update.state = mesos_pb2.TASK_FINISHED 168 | update.data = "ping" 169 | driver.sendStatusUpdate(update) 170 | log.info("Sent: TASK_FINISHED") 171 | thread = threading.Thread(target=run) 172 | thread.daemon = True 173 | thread.start() 174 | def frameworkMessage(self, driver, message): 175 | if message == ExecutorScheduler.shutdown_message: 176 | log.warning("Received shutdown message: %s", message) 177 | driver.stop() 178 | else: 179 | log.warning("Unexpected message: %s", message) 180 | 181 | 182 | ################################################################ Task factories 183 | 184 | def task_with_executor(tid, sid, eid, *args): 185 | executor = mesos_pb2.ExecutorInfo() 186 | executor.executor_id.value = eid 187 | executor.name = eid 188 | executor.command.MergeFrom(command(*args)) 189 | task = task_base(tid, sid) 190 | task.executor.MergeFrom(executor) 191 | return task 192 | 193 | def task_with_command(tid, sid, *args): 194 | task = task_base(tid, sid) 195 | task.command.MergeFrom(command(*args)) 196 | return task 197 | 198 | def task_with_daemon(tid, sid, image): 199 | task = task_base(tid, sid) 200 | task.command.MergeFrom(command(image=image)) 201 | return task 202 | 203 | def task_base(tid, sid, cpu=0.5, ram=256): 204 | task = mesos_pb2.TaskInfo() 205 | task.task_id.value = tid 206 | task.slave_id.value = sid.value 207 | task.name = tid 208 | cpus = task.resources.add() 209 | cpus.name = "cpus" 210 | cpus.type = mesos_pb2.Value.SCALAR 211 | cpus.scalar.value = cpu 212 | mem = task.resources.add() 213 | mem.name = "mem" 214 | mem.type = mesos_pb2.Value.SCALAR 215 | mem.scalar.value = ram 216 | return task 217 | 218 | def command(shell="", uris=[], image=None): 219 | command = mesos_pb2.CommandInfo() 220 | command.value = shell 221 | for uri in uris: 222 | command.uris.add().value = uri 223 | if image: # Rely on the default image when none is set 224 | container = mesos_pb2.CommandInfo.ContainerInfo() 225 | container.image = image 226 | command.container.MergeFrom(container) 227 | return command 228 | 229 | def present_task(task): 230 | if task.HasField("executor"): 231 | token, body = "executor", task.executor 232 | else: 233 | token, body = "command", task.command 234 | lines = pb.text_format.MessageToString(body).strip().split("\n") 235 | return "\n %s {\n %s\n }" % (token, "\n ".join(lines)) 236 | 237 | def present_status(update): 238 | info = mesos_pb2.TaskState.Name(update.state) 239 | if update.state in Scheduler.failed and update.HasField("message"): 240 | info += '\n message: "%s"' % update.message 241 | return info 242 | 243 | 244 | ########################################################################## Main 245 | 246 | def cli(): 247 | schedulers = { "sleep" : SleepScheduler, 248 | "pg" : PGScheduler, 249 | "executor" : ExecutorScheduler } 250 | p = argparse.ArgumentParser(prog="deimos-test.py") 251 | p.add_argument("--master", default="localhost:5050", 252 | help="Mesos master URL") 253 | p.add_argument("--test", choices=schedulers.keys(), default="sleep", 254 | help="Test scheduler to use") 255 | p.add_argument("--executor", action="store_true", default=False, 256 | help="Runs the executor instead of a test scheduler") 257 | p.add_argument("--test.container", 258 | help="Image URL to use (for any test)") 259 | p.add_argument("--test.uris", action="append", 260 | help="Pass any number of times to add URIs (for any test)") 261 | p.add_argument("--test.trials", type=int, 262 | help="Number of tasks to run (for any test)") 263 | p.add_argument("--test.sleep", type=int, 264 | help="Seconds to sleep (for sleep test)") 265 | p.add_argument("--test.command", 266 | help="Command to use (for executor test)") 267 | parsed = p.parse_args() 268 | 269 | if parsed.executor: 270 | log.info("Mesos executor mode was chosen") 271 | driver = mesos.MesosExecutorDriver(ExecutorSchedulerExecutor()) 272 | code = driver.run() 273 | log.info(mesos_pb2.Status.Name(code)) 274 | driver.stop() 275 | if code != mesos_pb2.DRIVER_STOPPED: 276 | log.error("Driver died in an anomalous state") 277 | os._exit(2) 278 | os._exit(0) 279 | 280 | pairs = [ (k.split("test.")[1:], v) for k, v in vars(parsed).items() ] 281 | constructor_args = dict( (k[0], v) for k, v in pairs if len(k) == 1 and v ) 282 | scheduler_class = schedulers[parsed.test] 283 | scheduler = scheduler_class(**constructor_args) 284 | args = ", ".join( "%s=%r" % (k, v) for k, v in constructor_args.items() ) 285 | log.info("Testing: %s(%s)" % (scheduler_class.__name__, args)) 286 | 287 | framework = mesos_pb2.FrameworkInfo() 288 | framework.name = "deimos-test" 289 | framework.user = "" 290 | driver = mesos.MesosSchedulerDriver(scheduler, framework, parsed.master) 291 | code = driver.run() 292 | log.info(mesos_pb2.Status.Name(code)) 293 | driver.stop() 294 | ################ 2 => driver problem 1 => tests failed 0 => tests passed 295 | if code != mesos_pb2.DRIVER_STOPPED: 296 | log.error("Driver died in an anomalous state") 297 | log.info("Aborted: %s(%s)" % (scheduler_class.__name__, args)) 298 | os._exit(2) 299 | if any(_ in Scheduler.failed for _ in scheduler.statuses.values()): 300 | log.error("Test run failed -- not all tasks made it") 301 | log.info("Failure: %s(%s)" % (scheduler_class.__name__, args)) 302 | os._exit(1) 303 | log.info("Success: %s(%s)" % (scheduler_class.__name__, args)) 304 | os._exit(0) 305 | 306 | logging.basicConfig(format="%(asctime)s.%(msecs)03d %(name)s %(message)s", 307 | datefmt="%H:%M:%S", level=logging.DEBUG) 308 | log = logging.getLogger("deimos-test") 309 | 310 | if __name__ == "__main__": 311 | def handler(signum, _): 312 | log.warning("Signal: " + str(signum)) 313 | os._exit(-signum) 314 | signal.signal(signal.SIGINT, handler) 315 | cli() 316 | 317 | -------------------------------------------------------------------------------- /deimos/containerizer_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: containerizer.proto 3 | 4 | from google.protobuf import descriptor as _descriptor 5 | from google.protobuf import message as _message 6 | from google.protobuf import reflection as _reflection 7 | from google.protobuf import descriptor_pb2 8 | # @@protoc_insertion_point(imports) 9 | 10 | 11 | import mesos_pb2 12 | 13 | 14 | DESCRIPTOR = _descriptor.FileDescriptor( 15 | name='containerizer.proto', 16 | package='mesos.containerizer', 17 | serialized_pb='\n\x13\x63ontainerizer.proto\x12\x13mesos.containerizer\x1a\x0bmesos.proto\"\xec\x01\n\x06Launch\x12(\n\x0c\x63ontainer_id\x18\x01 \x02(\x0b\x32\x12.mesos.ContainerID\x12\"\n\ttask_info\x18\x02 \x01(\x0b\x32\x0f.mesos.TaskInfo\x12*\n\rexecutor_info\x18\x03 \x01(\x0b\x32\x13.mesos.ExecutorInfo\x12\x11\n\tdirectory\x18\x04 \x01(\t\x12\x0c\n\x04user\x18\x05 \x01(\t\x12 \n\x08slave_id\x18\x06 \x01(\x0b\x32\x0e.mesos.SlaveID\x12\x11\n\tslave_pid\x18\x07 \x01(\t\x12\x12\n\ncheckpoint\x18\x08 \x01(\x08\"V\n\x06Update\x12(\n\x0c\x63ontainer_id\x18\x01 \x02(\x0b\x32\x12.mesos.ContainerID\x12\"\n\tresources\x18\x02 \x03(\x0b\x32\x0f.mesos.Resource\"0\n\x04Wait\x12(\n\x0c\x63ontainer_id\x18\x01 \x02(\x0b\x32\x12.mesos.ContainerID\"3\n\x07\x44\x65stroy\x12(\n\x0c\x63ontainer_id\x18\x01 \x02(\x0b\x32\x12.mesos.ContainerID\"1\n\x05Usage\x12(\n\x0c\x63ontainer_id\x18\x01 \x02(\x0b\x32\x12.mesos.ContainerID\">\n\x0bTermination\x12\x0e\n\x06killed\x18\x01 \x02(\x08\x12\x0f\n\x07message\x18\x02 \x02(\t\x12\x0e\n\x06status\x18\x03 \x01(\x05\"4\n\nContainers\x12&\n\ncontainers\x18\x01 \x03(\x0b\x32\x12.mesos.ContainerIDB(\n\x1eorg.apache.mesos.containerizerB\x06Protos') 18 | 19 | 20 | 21 | 22 | _LAUNCH = _descriptor.Descriptor( 23 | name='Launch', 24 | full_name='mesos.containerizer.Launch', 25 | filename=None, 26 | file=DESCRIPTOR, 27 | containing_type=None, 28 | fields=[ 29 | _descriptor.FieldDescriptor( 30 | name='container_id', full_name='mesos.containerizer.Launch.container_id', index=0, 31 | number=1, type=11, cpp_type=10, label=2, 32 | has_default_value=False, default_value=None, 33 | message_type=None, enum_type=None, containing_type=None, 34 | is_extension=False, extension_scope=None, 35 | options=None), 36 | _descriptor.FieldDescriptor( 37 | name='task_info', full_name='mesos.containerizer.Launch.task_info', index=1, 38 | number=2, type=11, cpp_type=10, label=1, 39 | has_default_value=False, default_value=None, 40 | message_type=None, enum_type=None, containing_type=None, 41 | is_extension=False, extension_scope=None, 42 | options=None), 43 | _descriptor.FieldDescriptor( 44 | name='executor_info', full_name='mesos.containerizer.Launch.executor_info', index=2, 45 | number=3, type=11, cpp_type=10, label=1, 46 | has_default_value=False, default_value=None, 47 | message_type=None, enum_type=None, containing_type=None, 48 | is_extension=False, extension_scope=None, 49 | options=None), 50 | _descriptor.FieldDescriptor( 51 | name='directory', full_name='mesos.containerizer.Launch.directory', index=3, 52 | number=4, type=9, cpp_type=9, label=1, 53 | has_default_value=False, default_value=unicode("", "utf-8"), 54 | message_type=None, enum_type=None, containing_type=None, 55 | is_extension=False, extension_scope=None, 56 | options=None), 57 | _descriptor.FieldDescriptor( 58 | name='user', full_name='mesos.containerizer.Launch.user', index=4, 59 | number=5, type=9, cpp_type=9, label=1, 60 | has_default_value=False, default_value=unicode("", "utf-8"), 61 | message_type=None, enum_type=None, containing_type=None, 62 | is_extension=False, extension_scope=None, 63 | options=None), 64 | _descriptor.FieldDescriptor( 65 | name='slave_id', full_name='mesos.containerizer.Launch.slave_id', index=5, 66 | number=6, type=11, cpp_type=10, label=1, 67 | has_default_value=False, default_value=None, 68 | message_type=None, enum_type=None, containing_type=None, 69 | is_extension=False, extension_scope=None, 70 | options=None), 71 | _descriptor.FieldDescriptor( 72 | name='slave_pid', full_name='mesos.containerizer.Launch.slave_pid', index=6, 73 | number=7, type=9, cpp_type=9, label=1, 74 | has_default_value=False, default_value=unicode("", "utf-8"), 75 | message_type=None, enum_type=None, containing_type=None, 76 | is_extension=False, extension_scope=None, 77 | options=None), 78 | _descriptor.FieldDescriptor( 79 | name='checkpoint', full_name='mesos.containerizer.Launch.checkpoint', index=7, 80 | number=8, type=8, cpp_type=7, label=1, 81 | has_default_value=False, default_value=False, 82 | message_type=None, enum_type=None, containing_type=None, 83 | is_extension=False, extension_scope=None, 84 | options=None), 85 | ], 86 | extensions=[ 87 | ], 88 | nested_types=[], 89 | enum_types=[ 90 | ], 91 | options=None, 92 | is_extendable=False, 93 | extension_ranges=[], 94 | serialized_start=58, 95 | serialized_end=294, 96 | ) 97 | 98 | 99 | _UPDATE = _descriptor.Descriptor( 100 | name='Update', 101 | full_name='mesos.containerizer.Update', 102 | filename=None, 103 | file=DESCRIPTOR, 104 | containing_type=None, 105 | fields=[ 106 | _descriptor.FieldDescriptor( 107 | name='container_id', full_name='mesos.containerizer.Update.container_id', index=0, 108 | number=1, type=11, cpp_type=10, label=2, 109 | has_default_value=False, default_value=None, 110 | message_type=None, enum_type=None, containing_type=None, 111 | is_extension=False, extension_scope=None, 112 | options=None), 113 | _descriptor.FieldDescriptor( 114 | name='resources', full_name='mesos.containerizer.Update.resources', index=1, 115 | number=2, type=11, cpp_type=10, label=3, 116 | has_default_value=False, default_value=[], 117 | message_type=None, enum_type=None, containing_type=None, 118 | is_extension=False, extension_scope=None, 119 | options=None), 120 | ], 121 | extensions=[ 122 | ], 123 | nested_types=[], 124 | enum_types=[ 125 | ], 126 | options=None, 127 | is_extendable=False, 128 | extension_ranges=[], 129 | serialized_start=296, 130 | serialized_end=382, 131 | ) 132 | 133 | 134 | _WAIT = _descriptor.Descriptor( 135 | name='Wait', 136 | full_name='mesos.containerizer.Wait', 137 | filename=None, 138 | file=DESCRIPTOR, 139 | containing_type=None, 140 | fields=[ 141 | _descriptor.FieldDescriptor( 142 | name='container_id', full_name='mesos.containerizer.Wait.container_id', index=0, 143 | number=1, type=11, cpp_type=10, label=2, 144 | has_default_value=False, default_value=None, 145 | message_type=None, enum_type=None, containing_type=None, 146 | is_extension=False, extension_scope=None, 147 | options=None), 148 | ], 149 | extensions=[ 150 | ], 151 | nested_types=[], 152 | enum_types=[ 153 | ], 154 | options=None, 155 | is_extendable=False, 156 | extension_ranges=[], 157 | serialized_start=384, 158 | serialized_end=432, 159 | ) 160 | 161 | 162 | _DESTROY = _descriptor.Descriptor( 163 | name='Destroy', 164 | full_name='mesos.containerizer.Destroy', 165 | filename=None, 166 | file=DESCRIPTOR, 167 | containing_type=None, 168 | fields=[ 169 | _descriptor.FieldDescriptor( 170 | name='container_id', full_name='mesos.containerizer.Destroy.container_id', index=0, 171 | number=1, type=11, cpp_type=10, label=2, 172 | has_default_value=False, default_value=None, 173 | message_type=None, enum_type=None, containing_type=None, 174 | is_extension=False, extension_scope=None, 175 | options=None), 176 | ], 177 | extensions=[ 178 | ], 179 | nested_types=[], 180 | enum_types=[ 181 | ], 182 | options=None, 183 | is_extendable=False, 184 | extension_ranges=[], 185 | serialized_start=434, 186 | serialized_end=485, 187 | ) 188 | 189 | 190 | _USAGE = _descriptor.Descriptor( 191 | name='Usage', 192 | full_name='mesos.containerizer.Usage', 193 | filename=None, 194 | file=DESCRIPTOR, 195 | containing_type=None, 196 | fields=[ 197 | _descriptor.FieldDescriptor( 198 | name='container_id', full_name='mesos.containerizer.Usage.container_id', index=0, 199 | number=1, type=11, cpp_type=10, label=2, 200 | has_default_value=False, default_value=None, 201 | message_type=None, enum_type=None, containing_type=None, 202 | is_extension=False, extension_scope=None, 203 | options=None), 204 | ], 205 | extensions=[ 206 | ], 207 | nested_types=[], 208 | enum_types=[ 209 | ], 210 | options=None, 211 | is_extendable=False, 212 | extension_ranges=[], 213 | serialized_start=487, 214 | serialized_end=536, 215 | ) 216 | 217 | 218 | _TERMINATION = _descriptor.Descriptor( 219 | name='Termination', 220 | full_name='mesos.containerizer.Termination', 221 | filename=None, 222 | file=DESCRIPTOR, 223 | containing_type=None, 224 | fields=[ 225 | _descriptor.FieldDescriptor( 226 | name='killed', full_name='mesos.containerizer.Termination.killed', index=0, 227 | number=1, type=8, cpp_type=7, label=2, 228 | has_default_value=False, default_value=False, 229 | message_type=None, enum_type=None, containing_type=None, 230 | is_extension=False, extension_scope=None, 231 | options=None), 232 | _descriptor.FieldDescriptor( 233 | name='message', full_name='mesos.containerizer.Termination.message', index=1, 234 | number=2, type=9, cpp_type=9, label=2, 235 | has_default_value=False, default_value=unicode("", "utf-8"), 236 | message_type=None, enum_type=None, containing_type=None, 237 | is_extension=False, extension_scope=None, 238 | options=None), 239 | _descriptor.FieldDescriptor( 240 | name='status', full_name='mesos.containerizer.Termination.status', index=2, 241 | number=3, type=5, cpp_type=1, label=1, 242 | has_default_value=False, default_value=0, 243 | message_type=None, enum_type=None, containing_type=None, 244 | is_extension=False, extension_scope=None, 245 | options=None), 246 | ], 247 | extensions=[ 248 | ], 249 | nested_types=[], 250 | enum_types=[ 251 | ], 252 | options=None, 253 | is_extendable=False, 254 | extension_ranges=[], 255 | serialized_start=538, 256 | serialized_end=600, 257 | ) 258 | 259 | 260 | _CONTAINERS = _descriptor.Descriptor( 261 | name='Containers', 262 | full_name='mesos.containerizer.Containers', 263 | filename=None, 264 | file=DESCRIPTOR, 265 | containing_type=None, 266 | fields=[ 267 | _descriptor.FieldDescriptor( 268 | name='containers', full_name='mesos.containerizer.Containers.containers', index=0, 269 | number=1, type=11, cpp_type=10, label=3, 270 | has_default_value=False, default_value=[], 271 | message_type=None, enum_type=None, containing_type=None, 272 | is_extension=False, extension_scope=None, 273 | options=None), 274 | ], 275 | extensions=[ 276 | ], 277 | nested_types=[], 278 | enum_types=[ 279 | ], 280 | options=None, 281 | is_extendable=False, 282 | extension_ranges=[], 283 | serialized_start=602, 284 | serialized_end=654, 285 | ) 286 | 287 | _LAUNCH.fields_by_name['container_id'].message_type = mesos_pb2._CONTAINERID 288 | _LAUNCH.fields_by_name['task_info'].message_type = mesos_pb2._TASKINFO 289 | _LAUNCH.fields_by_name['executor_info'].message_type = mesos_pb2._EXECUTORINFO 290 | _LAUNCH.fields_by_name['slave_id'].message_type = mesos_pb2._SLAVEID 291 | _UPDATE.fields_by_name['container_id'].message_type = mesos_pb2._CONTAINERID 292 | _UPDATE.fields_by_name['resources'].message_type = mesos_pb2._RESOURCE 293 | _WAIT.fields_by_name['container_id'].message_type = mesos_pb2._CONTAINERID 294 | _DESTROY.fields_by_name['container_id'].message_type = mesos_pb2._CONTAINERID 295 | _USAGE.fields_by_name['container_id'].message_type = mesos_pb2._CONTAINERID 296 | _CONTAINERS.fields_by_name['containers'].message_type = mesos_pb2._CONTAINERID 297 | DESCRIPTOR.message_types_by_name['Launch'] = _LAUNCH 298 | DESCRIPTOR.message_types_by_name['Update'] = _UPDATE 299 | DESCRIPTOR.message_types_by_name['Wait'] = _WAIT 300 | DESCRIPTOR.message_types_by_name['Destroy'] = _DESTROY 301 | DESCRIPTOR.message_types_by_name['Usage'] = _USAGE 302 | DESCRIPTOR.message_types_by_name['Termination'] = _TERMINATION 303 | DESCRIPTOR.message_types_by_name['Containers'] = _CONTAINERS 304 | 305 | class Launch(_message.Message): 306 | __metaclass__ = _reflection.GeneratedProtocolMessageType 307 | DESCRIPTOR = _LAUNCH 308 | 309 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Launch) 310 | 311 | class Update(_message.Message): 312 | __metaclass__ = _reflection.GeneratedProtocolMessageType 313 | DESCRIPTOR = _UPDATE 314 | 315 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Update) 316 | 317 | class Wait(_message.Message): 318 | __metaclass__ = _reflection.GeneratedProtocolMessageType 319 | DESCRIPTOR = _WAIT 320 | 321 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Wait) 322 | 323 | class Destroy(_message.Message): 324 | __metaclass__ = _reflection.GeneratedProtocolMessageType 325 | DESCRIPTOR = _DESTROY 326 | 327 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Destroy) 328 | 329 | class Usage(_message.Message): 330 | __metaclass__ = _reflection.GeneratedProtocolMessageType 331 | DESCRIPTOR = _USAGE 332 | 333 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Usage) 334 | 335 | class Termination(_message.Message): 336 | __metaclass__ = _reflection.GeneratedProtocolMessageType 337 | DESCRIPTOR = _TERMINATION 338 | 339 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Termination) 340 | 341 | class Containers(_message.Message): 342 | __metaclass__ = _reflection.GeneratedProtocolMessageType 343 | DESCRIPTOR = _CONTAINERS 344 | 345 | # @@protoc_insertion_point(class_scope:mesos.containerizer.Containers) 346 | 347 | 348 | DESCRIPTOR.has_options = True 349 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), '\n\036org.apache.mesos.containerizerB\006Protos') 350 | # @@protoc_insertion_point(module_scope) 351 | -------------------------------------------------------------------------------- /deimos/containerizer/docker.py: -------------------------------------------------------------------------------- 1 | import errno 2 | from fcntl import LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN 3 | from itertools import takewhile, dropwhile 4 | import logging 5 | import os 6 | import random 7 | import re 8 | import signal 9 | import subprocess 10 | import sys 11 | import threading 12 | import time 13 | 14 | try: # Prefer system installation of Mesos protos if available 15 | from mesos_pb2 import * 16 | from containerizer_pb2 import * 17 | except: 18 | from deimos.mesos_pb2 import * 19 | from deimos.containerizer_pb2 import * 20 | 21 | import deimos.cgroups 22 | from deimos.cmd import Run 23 | import deimos.config 24 | from deimos.containerizer import * 25 | import deimos.docker 26 | from deimos.err import Err 27 | import deimos.logger 28 | from deimos.logger import log 29 | import deimos.mesos 30 | import deimos.path 31 | from deimos.proto import recordio 32 | from deimos._struct import _Struct 33 | import deimos.state 34 | import deimos.sig 35 | 36 | 37 | class Docker(Containerizer, _Struct): 38 | 39 | def __init__(self, workdir="/tmp/mesos-sandbox", 40 | state_root="/tmp/deimos", 41 | shared_dir="fs", 42 | optimistic_unpack=True, 43 | hooks=deimos.config.Hooks(), 44 | container_settings=deimos.config.Containers(), 45 | index_settings=deimos.config.DockerIndex()): 46 | _Struct.__init__(self, workdir=workdir, 47 | state_root=state_root, 48 | shared_dir=shared_dir, 49 | optimistic_unpack=optimistic_unpack, 50 | hooks=hooks, 51 | container_settings=container_settings, 52 | index_settings=index_settings, 53 | runner=None, 54 | state=None) 55 | 56 | def launch(self, launch_pb, *args): 57 | log.info(" ".join(args)) 58 | fork = False if "--no-fork" in args else True 59 | deimos.sig.install(self.log_signal) 60 | run_options = [] 61 | launchy = deimos.mesos.Launch(launch_pb) 62 | state = deimos.state.State(self.state_root, 63 | mesos_id=launchy.container_id) 64 | state.push() 65 | lk_l = state.lock("launch", LOCK_EX) 66 | state.executor_id = launchy.executor_id 67 | state.push() 68 | state.ids() 69 | mesos_directory() # Redundant? 70 | if launchy.directory: 71 | os.chdir(launchy.directory) 72 | # TODO: if launchy.user: 73 | # os.seteuid(launchy.user) 74 | url, options = launchy.container 75 | options, trailing_argv = split_on(options, "//") 76 | url, options = self.container_settings.override(url, options) 77 | 78 | true_argv = launchy.argv if trailing_argv is None else trailing_argv 79 | 80 | image = self.determine_image(url, launchy) 81 | log.info("image = %s", image) 82 | run_options += ["--sig-proxy"] 83 | run_options += ["--rm"] # This is how we ensure container cleanup 84 | run_options += ["--cidfile", state.resolve("cid")] 85 | 86 | place_uris(launchy, self.shared_dir, self.optimistic_unpack) 87 | run_options += ["-w", self.workdir] 88 | 89 | # Docker requires an absolute path to a source filesystem, separated 90 | # from the bind path in the container with a colon, but the absolute 91 | # path to the Mesos sandbox might have colons in it (TaskIDs with 92 | # timestamps can cause this situation). So we create a soft link to it 93 | # and mount that. 94 | shared_full = os.path.abspath(self.shared_dir) 95 | sandbox_symlink = state.sandbox_symlink(shared_full) 96 | run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)] 97 | 98 | cpus, mems = launchy.cpu_and_mem 99 | env = launchy.env 100 | run_options += options 101 | 102 | # We need to wrap the call to Docker in a call to the Mesos executor 103 | # if no executor is passed as part of the task. We need to pass the 104 | # MESOS_* environment variables in to the container if we're going to 105 | # start an executor. 106 | observer_argv = None 107 | if launchy.needs_observer: 108 | # NB: The "@@docker@@" variant is a work around for Mesos's option 109 | # parser. There is a fix in the pipeline. 110 | observer_argv = [mesos_executor(), "--override", 111 | deimos.path.me(), "observe", state.mesos_id] 112 | state.lock("observe", LOCK_EX | LOCK_NB) # Explanation of Locks 113 | # When the observer is running, we would like its call to 114 | # observe() to finish before all the wait(); and we'd like the 115 | # observer to have a chance to report TASK_FINISHED before the 116 | # calls to wait() report their results (which would result in a 117 | # TASK_FAILED). 118 | # 119 | # For this reason, we take the "observe" lock in launch(), before 120 | # we call the observer and before releasing the "launch" or "wait" 121 | # locks. 122 | # 123 | # Calls to observe() actually skip locking "observe"; but wait() 124 | # calls must take this lock. The "observe" lock is held by 125 | # launch() until the observer executor completes, at which point 126 | # we can be reasonably sure its status was propagated to the Mesos 127 | # slave. 128 | else: 129 | env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)] 130 | 131 | # Flatten our env list of tuples into dictionary object for Popen 132 | popen_env = dict(env) 133 | 134 | self.place_dockercfg() 135 | 136 | runner_argv = deimos.docker.run(run_options, image, true_argv, 137 | env=env, ports=launchy.ports, 138 | cpus=cpus, mems=mems) 139 | 140 | log_mesos_env(logging.DEBUG) 141 | 142 | observer = None 143 | with open("stdout", "w") as o: # This awkward multi 'with' is a 144 | with open("stderr", "w") as e: # concession to 2.6 compatibility 145 | with open(os.devnull) as devnull: 146 | log.info(deimos.cmd.present(runner_argv)) 147 | 148 | onlaunch = self.hooks.onlaunch 149 | # test for default configuration (empty list) 150 | if onlaunch: 151 | # We're going to catch all exceptions because it's not 152 | # in scope for Deimos to stack trace on a hook error 153 | try: 154 | subprocess.Popen(onlaunch, stdin=devnull, 155 | stdout=devnull, 156 | stderr=devnull, 157 | env=popen_env) 158 | except Exception as e: 159 | log.warning("onlaunch hook failed with %s" % e) 160 | 161 | self.runner = subprocess.Popen(runner_argv, stdin=devnull, 162 | stdout=o, 163 | stderr=e) 164 | state.pid(self.runner.pid) 165 | state.await_cid() 166 | state.push() 167 | lk_w = state.lock("wait", LOCK_EX) 168 | lk_l.unlock() 169 | if fork: 170 | pid = os.fork() 171 | if pid is not 0: 172 | state.ids() 173 | log.info("Forking watcher into child...") 174 | return 175 | state.ids() 176 | if observer_argv is not None: 177 | log.info(deimos.cmd.present(observer_argv)) 178 | call = deimos.cmd.in_sh(observer_argv, allstderr=False) 179 | # TODO: Collect these leaking file handles. 180 | obs_out = open(state.resolve("observer.out"), "w+") 181 | obs_err = open(state.resolve("observer.err"), "w+") 182 | # If the Mesos executor sees LIBPROCESS_PORT=0 (which 183 | # is passed by the slave) there are problems when it 184 | # attempts to bind. ("Address already in use"). 185 | # Purging both LIBPROCESS_* net variables, to be safe. 186 | for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]: 187 | if v in os.environ: 188 | del os.environ[v] 189 | observer = subprocess.Popen(call, stdin=devnull, 190 | stdout=obs_out, 191 | stderr=obs_err, 192 | close_fds=True) 193 | data = Run(data=True)(deimos.docker.wait(state.cid())) 194 | state.exit(data) 195 | lk_w.unlock() 196 | for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]: 197 | if p is None: 198 | continue 199 | thread = threading.Thread(target=p.wait) 200 | thread.start() 201 | thread.join(10) 202 | if thread.is_alive(): 203 | log.warning(deimos.cmd.present(arr, "SIGTERM after 10s")) 204 | p.terminate() 205 | thread.join(1) 206 | if thread.is_alive(): 207 | log.warning(deimos.cmd.present(arr, "SIGKILL after 1s")) 208 | p.kill() 209 | msg = deimos.cmd.present(arr, p.wait()) 210 | if p.wait() == 0: 211 | log.info(msg) 212 | else: 213 | log.warning(msg) 214 | 215 | with open(os.devnull) as devnull: 216 | ondestroy = self.hooks.ondestroy 217 | if ondestroy: 218 | # Deimos shouldn't care if the hook fails. 219 | # The hook should implement its own error handling. 220 | try: 221 | subprocess.Popen(ondestroy, stdin=devnull, 222 | stdout=devnull, 223 | stderr=devnull, 224 | env=popen_env) 225 | except Exception as e: 226 | log.warning("ondestroy hook failed with %s" % e) 227 | 228 | return state.exit() 229 | 230 | def update(self, update_pb, *args): 231 | log.info(" ".join(args)) 232 | log.info("Update is a no-op for Docker...") 233 | 234 | def usage(self, usage_pb, *args): 235 | log.info(" ".join(args)) 236 | container_id = usage_pb.container_id.value 237 | state = deimos.state.State(self.state_root, mesos_id=container_id) 238 | state.await_launch() 239 | state.ids() 240 | if state.cid() is None: 241 | log.info("Container not started?") 242 | return 0 243 | if state.exit() is not None: 244 | log.info("Container is stopped") 245 | return 0 246 | cg = deimos.cgroups.CGroups(**deimos.docker.cgroups(state.cid())) 247 | if len(cg.keys()) == 0: 248 | log.info("Container has no CGroups...already stopped?") 249 | return 0 250 | try: 251 | recordio.write(ResourceStatistics, 252 | timestamp=time.time(), 253 | mem_limit_bytes=cg.memory.limit(), 254 | cpus_limit=cg.cpu.limit(), 255 | # cpus_user_time_secs = cg.cpuacct.user_time(), 256 | # cpus_system_time_secs = cg.cpuacct.system_time(), 257 | mem_rss_bytes=cg.memory.rss()) 258 | except AttributeError as e: 259 | log.error("Missing CGroup!") 260 | raise e 261 | return 0 262 | 263 | def observe(self, *args): 264 | log.info(" ".join(args)) 265 | state = deimos.state.State(self.state_root, mesos_id=args[0]) 266 | self.state = state 267 | deimos.sig.install(self.stop_docker_and_resume) 268 | state.await_launch() 269 | try: # Take the wait lock to block calls to wait() 270 | state.lock("wait", LOCK_SH, seconds=None) 271 | except IOError as e: # Allows for signal recovery 272 | if e.errno != errno.EINTR: 273 | raise e 274 | state.lock("wait", LOCK_SH, seconds=1) 275 | if state.exit() is not None: 276 | return state.exit() 277 | raise Err("Wait lock is not held nor is exit file present") 278 | 279 | def wait(self, wait_pb, *args): 280 | log.info(" ".join(args)) 281 | container_id = wait_pb.container_id.value 282 | state = deimos.state.State(self.state_root, mesos_id=container_id) 283 | self.state = state 284 | deimos.sig.install(self.stop_docker_and_resume) 285 | state.await_launch() 286 | try: # Wait for the observe lock so observe completes first 287 | state.lock("observe", LOCK_SH, seconds=None) 288 | state.lock("wait", LOCK_SH, seconds=None) 289 | except IOError as e: # Allows for signal recovery 290 | if e.errno != errno.EINTR: 291 | raise e 292 | state.lock("observe", LOCK_SH, seconds=1) 293 | state.lock("wait", LOCK_SH, seconds=1) 294 | termination = (state.exit() if state.exit() is not None else 64) << 8 295 | recordio.write(Termination, 296 | killed=False, 297 | message="", 298 | status=termination) 299 | if state.exit() is not None: 300 | return state.exit() 301 | raise Err("Wait lock is not held nor is exit file present") 302 | 303 | def destroy(self, destroy_pb, *args): 304 | log.info(" ".join(args)) 305 | container_id = destroy_pb.container_id.value 306 | state = deimos.state.State(self.state_root, mesos_id=container_id) 307 | state.await_launch() 308 | lk_d = state.lock("destroy", LOCK_EX) 309 | if state.exit() is None: 310 | Run()(deimos.docker.stop(state.cid())) 311 | else: 312 | log.info("Container is stopped") 313 | return 0 314 | 315 | def containers(self, *args): 316 | log.info(" ".join(args)) 317 | data = Run(data=True)(deimos.docker.docker("ps", "--no-trunc", "-q")) 318 | mesos_ids = [] 319 | for line in data.splitlines(): 320 | cid = line.strip() 321 | state = deimos.state.State(self.state_root, docker_id=cid) 322 | if not state.exists(): 323 | continue 324 | try: 325 | state.lock("wait", LOCK_SH | LOCK_NB) 326 | except deimos.flock.Err: # LOCK_EX held, so launch() is running 327 | mesos_ids += [state.mesos_container_id()] 328 | containers = Containers() 329 | for mesos_id in mesos_ids: 330 | container = containers.containers.add() 331 | container.value = mesos_id 332 | recordio.writeProto(containers) 333 | return 0 334 | 335 | def log_signal(self, signum): 336 | pass 337 | 338 | def stop_docker_and_resume(self, signum): 339 | if self.state is not None and self.state.cid() is not None: 340 | cid = self.state.cid() 341 | log.info("Trying to stop Docker container: %s", cid) 342 | try: 343 | Run()(deimos.docker.stop(cid)) 344 | except subprocess.CalledProcessError: 345 | pass 346 | return deimos.sig.Resume() 347 | 348 | def determine_image(self, url, launchy): 349 | opts = dict(self.container_settings.image.items(onlyset=True)) 350 | if "default" in opts: 351 | default = url_to_image(opts["default"]) 352 | else: 353 | default = self.image_from_system_context(launchy) 354 | image = url_to_image(url) 355 | return default if image == "" else image 356 | 357 | def image_from_system_context(self, launchy): 358 | opts = dict(self.index_settings.items(onlyset=True)) 359 | if "account_libmesos" in opts: 360 | if not launchy.needs_observer: 361 | opts["account"] = opts["account_libmesos"] 362 | del opts["account_libmesos"] 363 | if "dockercfg" in opts: 364 | del opts["dockercfg"] 365 | return deimos.docker.matching_image_for_host(**opts) 366 | 367 | def place_dockercfg(self): 368 | dockercfg = self.index_settings.dockercfg 369 | if dockercfg is not None: 370 | log.info("Copying to .dockercfg: %s" % dockercfg) 371 | Run()(["cp", dockercfg, ".dockercfg"]) 372 | 373 | 374 | def url_to_image(url): 375 | pre, image = re.split(r"^docker:///?", url) 376 | if pre != "": 377 | raise Err("URL '%s' is not a valid docker:// URL!" % url) 378 | return image 379 | 380 | 381 | def split_on(iterable, element): 382 | preceding = list(takewhile(lambda _: _ != element, iterable)) 383 | following = list(dropwhile(lambda _: _ != element, iterable)) 384 | return preceding, (following[1:] if len(following) > 0 else None) 385 | -------------------------------------------------------------------------------- /proto/mesos.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package mesos; 20 | 21 | option java_package = "org.apache.mesos"; 22 | option java_outer_classname = "Protos"; 23 | 24 | 25 | /** 26 | * Status is used to indicate the state of the scheduler and executor 27 | * driver after function calls. 28 | */ 29 | enum Status { 30 | DRIVER_NOT_STARTED = 1; 31 | DRIVER_RUNNING = 2; 32 | DRIVER_ABORTED = 3; 33 | DRIVER_STOPPED = 4; 34 | } 35 | 36 | 37 | /** 38 | * A unique ID assigned to a framework. A framework can reuse this ID 39 | * in order to do failover (see MesosSchedulerDriver). 40 | */ 41 | message FrameworkID { 42 | required string value = 1; 43 | } 44 | 45 | 46 | /** 47 | * A unique ID assigned to an offer. 48 | */ 49 | message OfferID { 50 | required string value = 1; 51 | } 52 | 53 | 54 | /** 55 | * A unique ID assigned to a slave. Currently, a slave gets a new ID 56 | * whenever it (re)registers with Mesos. Framework writers shouldn't 57 | * assume any binding between a slave ID and and a hostname. 58 | */ 59 | message SlaveID { 60 | required string value = 1; 61 | } 62 | 63 | 64 | /** 65 | * A framework generated ID to distinguish a task. The ID must remain 66 | * unique while the task is active. However, a framework can reuse an 67 | * ID _only_ if a previous task with the same ID has reached a 68 | * terminal state (e.g., TASK_FINISHED, TASK_LOST, TASK_KILLED, etc.). 69 | */ 70 | message TaskID { 71 | required string value = 1; 72 | } 73 | 74 | 75 | /** 76 | * A framework generated ID to distinguish an executor. Only one 77 | * executor with the same ID can be active on the same slave at a 78 | * time. 79 | */ 80 | message ExecutorID { 81 | required string value = 1; 82 | } 83 | 84 | 85 | /** 86 | * A slave generated ID to distinguish a container. The ID must be unique 87 | * between any active or completed containers on the slave. In particular, 88 | * containers for different runs of the same (framework, executor) pair must be 89 | * unique. 90 | */ 91 | message ContainerID { 92 | required string value = 1; 93 | } 94 | 95 | 96 | /** 97 | * Describes a framework. The user field is used to determine the 98 | * Unix user that an executor/task should be launched as. If the user 99 | * field is set to an empty string Mesos will automagically set it 100 | * to the current user. Note that the ID is only available after a 101 | * framework has registered, however, it is included here in order to 102 | * facilitate scheduler failover (i.e., if it is set then the 103 | * MesosSchedulerDriver expects the scheduler is performing failover). 104 | * The amount of time that the master will wait for the scheduler to 105 | * failover before removing the framework is specified by 106 | * failover_timeout. If checkpoint is set, framework pid, executor 107 | * pids and status updates are checkpointed to disk by the slaves. 108 | * Checkpointing allows a restarted slave to reconnect with old 109 | * executors and recover status updates, at the cost of disk I/O. 110 | * The role field is used to group frameworks for allocation 111 | * decisions, depending on the allocation policy being used. 112 | * If the hostname field is set to an empty string Mesos will 113 | * automagically set it to the current hostname. 114 | * The principal field should match the credential the framework uses 115 | * in authentication. This field is used for framework API rate 116 | * exporting and limiting and should be set even if authentication is 117 | * not enabled if these features are desired. 118 | */ 119 | message FrameworkInfo { 120 | required string user = 1; 121 | required string name = 2; 122 | optional FrameworkID id = 3; 123 | optional double failover_timeout = 4 [default = 0.0]; 124 | optional bool checkpoint = 5 [default = false]; 125 | optional string role = 6 [default = "*"]; 126 | optional string hostname = 7; 127 | optional string principal = 8; 128 | } 129 | 130 | 131 | /** 132 | * Describes a health check for a task or executor (or any arbitrary 133 | * process/command). A "strategy" is picked by specifying one of the 134 | * optional fields, currently only 'http' is supported. Specifying 135 | * more than one strategy is an error. 136 | */ 137 | message HealthCheck { 138 | // Describes an HTTP health check. 139 | message HTTP { 140 | // Port to send the HTTP request. 141 | required uint32 port = 1; 142 | 143 | // HTTP request path. 144 | optional string path = 2 [default = "/"]; 145 | 146 | // TODO(benh): Implement: 147 | // Whether or not to use HTTPS. 148 | // optional bool ssl = 3 [default = false]; 149 | 150 | // Expected response statuses. Not specifying any statuses implies 151 | // that any returned status is acceptable. 152 | repeated uint32 statuses = 4; 153 | 154 | // TODO(benh): Include an 'optional bytes data' field for checking 155 | // for specific data in the response. 156 | } 157 | 158 | optional HTTP http = 1; 159 | 160 | // TODO(benh): Consider adding a URL health check strategy which 161 | // allows doing something similar to the HTTP strategy but 162 | // encapsulates all the details in a single string field. 163 | 164 | // TODO(benh): Other possible health check strategies could include 165 | // one for TCP/UDP or a "command". A "command" could be running a 166 | // (shell) command to check the healthiness. We'd need to determine 167 | // what arguments (or environment variables) we'd want to set so 168 | // that the command could do it's job (i.e., do we want to expose 169 | // the stdout/stderr and/or the pid to make checking for healthiness 170 | // easier). 171 | 172 | // Amount of time to wait until starting the health checks. 173 | optional double delay_seconds = 2 [default = 15.0]; 174 | 175 | // Interval between health checks. 176 | optional double interval_seconds = 3 [default = 10.0]; 177 | 178 | // Amount of time to wait for the health check to complete. 179 | optional double timeout_seconds = 4 [default = 20.0]; 180 | 181 | // Number of consecutive failures until considered unhealthy. 182 | optional uint32 failures = 5 [default = 3]; 183 | } 184 | 185 | 186 | /** 187 | * Describes a command, executed via: '/bin/sh -c value'. Any URIs specified 188 | * are fetched before executing the command. If the executable field for an 189 | * uri is set, executable file permission is set on the downloaded file. 190 | * Otherwise, if the downloaded file has a recognized archive extension 191 | * (currently [compressed] tar and zip) it is extracted into the executor's 192 | * working directory. This extraction can be disabled by setting `extract` to 193 | * false. In addition, any environment variables are set before executing 194 | * the command (so they can be used to "parameterize" your command). 195 | */ 196 | message CommandInfo { 197 | message URI { 198 | required string value = 1; 199 | optional bool executable = 2; 200 | optional bool extract = 3 [default = true]; 201 | } 202 | 203 | // Describes a container. 204 | // Not all containerizers currently implement ContainerInfo, so it 205 | // is possible that a launched task will fail due to supplying this 206 | // attribute. 207 | // NOTE: The containerizer API is currently in an early beta or 208 | // even alpha state. Some details, like the exact semantics of an 209 | // "image" or "options" are not yet hardened. 210 | // TODO(tillt): Describe the exact scheme and semantics of "image" 211 | // and "options". 212 | message ContainerInfo { 213 | // URI describing the container image name. 214 | required string image = 1; 215 | 216 | // Describes additional options passed to the containerizer. 217 | repeated string options = 2; 218 | } 219 | 220 | // NOTE: MesosContainerizer does currently not support this 221 | // attribute and tasks supplying a 'container' will fail. 222 | optional ContainerInfo container = 4; 223 | 224 | repeated URI uris = 1; 225 | 226 | optional Environment environment = 2; 227 | 228 | // Actual command (i.e., 'echo hello world'). 229 | required string value = 3; 230 | 231 | // Enables executor and tasks to run as a specific user. If the user 232 | // field is present both in FrameworkInfo and here, the CommandInfo 233 | // user value takes precedence. 234 | optional string user = 5; 235 | 236 | // A health check for the command (currently in *alpha* and initial 237 | // support will only be for TaskInfo's that have a CommandInfo). 238 | optional HealthCheck health_check = 6; 239 | } 240 | 241 | 242 | /** 243 | * Describes information about an executor. The 'data' field can be 244 | * used to pass arbitrary bytes to an executor. 245 | */ 246 | message ExecutorInfo { 247 | required ExecutorID executor_id = 1; 248 | optional FrameworkID framework_id = 8; // TODO(benh): Make this required. 249 | required CommandInfo command = 7; 250 | repeated Resource resources = 5; 251 | optional string name = 9; 252 | 253 | // Source is an identifier style string used by frameworks to track 254 | // the source of an executor. This is useful when it's possible for 255 | // different executor ids to be related semantically. 256 | // NOTE: Source is exposed alongside the resource usage of the 257 | // executor via JSON on the slave. This allows users to import 258 | // usage information into a time series database for monitoring. 259 | optional string source = 10; 260 | optional bytes data = 4; 261 | } 262 | 263 | 264 | /** 265 | * Describes a master. This will probably have more fields in the 266 | * future which might be used, for example, to link a framework webui 267 | * to a master webui. 268 | */ 269 | message MasterInfo { 270 | required string id = 1; 271 | required uint32 ip = 2; 272 | required uint32 port = 3 [default = 5050]; 273 | optional string pid = 4; 274 | optional string hostname = 5; 275 | } 276 | 277 | 278 | /** 279 | * Describes a slave. Note that the 'id' field is only available after 280 | * a slave is registered with the master, and is made available here 281 | * to facilitate re-registration. If checkpoint is set, the slave is 282 | * checkpointing its own information and potentially frameworks' 283 | * information (if a framework has checkpointing enabled). 284 | */ 285 | message SlaveInfo { 286 | required string hostname = 1; 287 | optional int32 port = 8 [default = 5051]; 288 | repeated Resource resources = 3; 289 | repeated Attribute attributes = 5; 290 | optional SlaveID id = 6; 291 | optional bool checkpoint = 7 [default = false]; 292 | 293 | // Deprecated! 294 | optional string webui_hostname = 2; 295 | optional int32 webui_port = 4 [default = 8081]; 296 | } 297 | 298 | 299 | /** 300 | * Describes an Attribute or Resource "value". A value is described 301 | * using the standard protocol buffer "union" trick. 302 | */ 303 | message Value { 304 | enum Type { 305 | SCALAR = 0; 306 | RANGES = 1; 307 | SET = 2; 308 | TEXT = 3; 309 | } 310 | 311 | message Scalar { 312 | required double value = 1; 313 | } 314 | 315 | message Range { 316 | required uint64 begin = 1; 317 | required uint64 end = 2; 318 | } 319 | 320 | message Ranges { 321 | repeated Range range = 1; 322 | } 323 | 324 | message Set { 325 | repeated string item = 1; 326 | } 327 | 328 | message Text { 329 | required string value = 1; 330 | } 331 | 332 | required Type type = 1; 333 | optional Scalar scalar = 2; 334 | optional Ranges ranges = 3; 335 | optional Set set = 4; 336 | optional Text text = 5; 337 | } 338 | 339 | 340 | /** 341 | * Describes an attribute that can be set on a machine. For now, 342 | * attributes and resources share the same "value" type, but this may 343 | * change in the future and attributes may only be string based. 344 | */ 345 | message Attribute { 346 | required string name = 1; 347 | required Value.Type type = 2; 348 | optional Value.Scalar scalar = 3; 349 | optional Value.Ranges ranges = 4; 350 | optional Value.Set set = 6; 351 | optional Value.Text text = 5; 352 | } 353 | 354 | 355 | /** 356 | * Describes a resource on a machine. A resource can take on one of 357 | * three types: scalar (double), a list of finite and discrete ranges 358 | * (e.g., [1-10, 20-30]), or a set of items. A resource is described 359 | * using the standard protocol buffer "union" trick. 360 | * 361 | * TODO(benh): Add better support for "expected" resources (e.g., 362 | * cpus, memory, disk, network). 363 | */ 364 | message Resource { 365 | required string name = 1; 366 | required Value.Type type = 2; 367 | optional Value.Scalar scalar = 3; 368 | optional Value.Ranges ranges = 4; 369 | optional Value.Set set = 5; 370 | optional string role = 6 [default = "*"]; 371 | } 372 | 373 | 374 | /* 375 | * A snapshot of resource usage statistics. 376 | */ 377 | message ResourceStatistics { 378 | required double timestamp = 1; // Snapshot time, in seconds since the Epoch. 379 | 380 | // CPU Usage Information: 381 | // Total CPU time spent in user mode, and kernel mode. 382 | optional double cpus_user_time_secs = 2; 383 | optional double cpus_system_time_secs = 3; 384 | 385 | // Number of CPUs allocated. 386 | optional double cpus_limit = 4; 387 | 388 | // cpu.stat on process throttling (for contention issues). 389 | optional uint32 cpus_nr_periods = 7; 390 | optional uint32 cpus_nr_throttled = 8; 391 | optional double cpus_throttled_time_secs = 9; 392 | 393 | // Memory Usage Information: 394 | optional uint64 mem_rss_bytes = 5; // Resident Set Size. 395 | 396 | // Amount of memory resources allocated. 397 | optional uint64 mem_limit_bytes = 6; 398 | 399 | // Broken out memory usage information (files, anonymous, and mmaped files) 400 | optional uint64 mem_file_bytes = 10; 401 | optional uint64 mem_anon_bytes = 11; 402 | optional uint64 mem_mapped_file_bytes = 12; 403 | 404 | // TODO(bmahler): Add disk usage. 405 | // TODO(bmahler): Add network usage? 406 | } 407 | 408 | 409 | /** 410 | * Describes a snapshot of the resource usage for an executor. 411 | * 412 | * TODO(bmahler): Note that we want to be sending this information 413 | * to the master, and subsequently to the relevant scheduler. So 414 | * this proto is designed to be easy for the scheduler to use, this 415 | * is why we provide the slave id, executor info / task info. 416 | */ 417 | message ResourceUsage { 418 | required SlaveID slave_id = 1; 419 | required FrameworkID framework_id = 2; 420 | 421 | // Resource usage is for an executor. For tasks launched with 422 | // an explicit executor, the executor id is provided. For tasks 423 | // launched without an executor, our internal executor will be 424 | // used. In this case, we provide the task id here instead, in 425 | // order to make this message easier for schedulers to work with. 426 | 427 | optional ExecutorID executor_id = 3; // If present, this executor was 428 | optional string executor_name = 4; // explicitly specified. 429 | 430 | optional TaskID task_id = 5; // If present, the task did not have an executor. 431 | 432 | // If missing, the isolation module cannot provide resource usage. 433 | optional ResourceStatistics statistics = 6; 434 | } 435 | 436 | 437 | /** 438 | * Describes a request for resources that can be used by a framework 439 | * to proactively influence the allocator. If 'slave_id' is provided 440 | * then this request is assumed to only apply to resources on that 441 | * slave. 442 | */ 443 | message Request { 444 | optional SlaveID slave_id = 1; 445 | repeated Resource resources = 2; 446 | } 447 | 448 | 449 | /** 450 | * Describes some resources available on a slave. An offer only 451 | * contains resources from a single slave. 452 | */ 453 | message Offer { 454 | required OfferID id = 1; 455 | required FrameworkID framework_id = 2; 456 | required SlaveID slave_id = 3; 457 | required string hostname = 4; 458 | repeated Resource resources = 5; 459 | repeated Attribute attributes = 7; 460 | repeated ExecutorID executor_ids = 6; 461 | } 462 | 463 | 464 | /** 465 | * Describes a task. Passed from the scheduler all the way to an 466 | * executor (see SchedulerDriver::launchTasks and 467 | * Executor::launchTask). Either ExecutorInfo or CommandInfo should be set. 468 | * A different executor can be used to launch this task, and subsequent tasks 469 | * meant for the same executor can reuse the same ExecutorInfo struct. 470 | */ 471 | message TaskInfo { 472 | required string name = 1; 473 | required TaskID task_id = 2; 474 | required SlaveID slave_id = 3; 475 | repeated Resource resources = 4; 476 | optional ExecutorInfo executor = 5; 477 | optional CommandInfo command = 7; 478 | optional bytes data = 6; 479 | } 480 | 481 | 482 | /** 483 | * Describes possible task states. IMPORTANT: Mesos assumes tasks that 484 | * enter terminal states (see below) imply the task is no longer 485 | * running and thus clean up any thing associated with the task 486 | * (ultimately offering any resources being consumed by that task to 487 | * another task). 488 | */ 489 | enum TaskState { 490 | TASK_STAGING = 6; // Initial state. Framework status updates should not use. 491 | TASK_STARTING = 0; 492 | TASK_RUNNING = 1; 493 | TASK_FINISHED = 2; // TERMINAL. 494 | TASK_FAILED = 3; // TERMINAL. 495 | TASK_KILLED = 4; // TERMINAL. 496 | TASK_LOST = 5; // TERMINAL. 497 | } 498 | 499 | 500 | /** 501 | * Describes the current status of a task. 502 | */ 503 | message TaskStatus { 504 | required TaskID task_id = 1; 505 | required TaskState state = 2; 506 | optional string message = 4; // Possible message explaining state. 507 | optional bytes data = 3; 508 | optional SlaveID slave_id = 5; 509 | optional double timestamp = 6; 510 | } 511 | 512 | 513 | /** 514 | * Describes possible filters that can be applied to unused resources 515 | * (see SchedulerDriver::launchTasks) to influence the allocator. 516 | */ 517 | message Filters { 518 | // Time to consider unused resources refused. Note that all unused 519 | // resources will be considered refused and use the default value 520 | // (below) regardless of whether Filters was passed to 521 | // SchedulerDriver::launchTasks. You MUST pass Filters with this 522 | // field set to change this behavior (i.e., get another offer which 523 | // includes unused resources sooner or later than the default). 524 | optional double refuse_seconds = 1 [default = 5.0]; 525 | } 526 | 527 | 528 | /** 529 | * Describes a collection of environment variables. This is used with 530 | * CommandInfo in order to set environment variables before running a 531 | * command. 532 | */ 533 | message Environment { 534 | message Variable { 535 | required string name = 1; 536 | required string value = 2; 537 | } 538 | 539 | repeated Variable variables = 1; 540 | } 541 | 542 | 543 | /** 544 | * A generic (key, value) pair used in various places for parameters. 545 | */ 546 | message Parameter { 547 | required string key = 1; 548 | required string value = 2; 549 | } 550 | 551 | 552 | /** 553 | * Collection of Parameter. 554 | */ 555 | message Parameters { 556 | repeated Parameter parameter = 1; 557 | } 558 | 559 | 560 | /** 561 | * Credential used for authentication. 562 | * 563 | * NOTE: The 'principal' is used for authenticating the framework or slave 564 | * with the master. This is different from 'FrameworkInfo.user' 565 | * which is used to determine the user under which the framework's 566 | * executors/tasks are run. 567 | */ 568 | message Credential { 569 | required string principal = 1; 570 | optional bytes secret = 2; 571 | } 572 | 573 | 574 | /** 575 | * ACLs used for authorization. 576 | */ 577 | message ACL { 578 | 579 | // Entity is used to describe a subject(s) or an object(s) of an ACL. 580 | // NOTE: 581 | // To allow everyone access to an Entity set its type to 'ANY'. 582 | // To deny access to an Entity set its type to 'NONE'. 583 | message Entity { 584 | enum Type { 585 | SOME = 0; 586 | ANY = 1; 587 | NONE = 2; 588 | } 589 | optional Type type = 1 [default = SOME]; 590 | repeated string values = 2; // Ignored for ANY/NONE. 591 | } 592 | 593 | // ACLs. 594 | message RunTasks { 595 | // Subjects. 596 | required Entity principals = 1; // Framework principals. 597 | 598 | // Objects. 599 | required Entity users = 2; // Users to run the tasks/executors as. 600 | } 601 | 602 | message ReceiveOffers { 603 | // Subjects. 604 | required Entity principals = 1; // Framework principals. 605 | 606 | // Objects. 607 | required Entity roles = 2; // Resource roles that can be offered. 608 | } 609 | 610 | message HTTPGet { 611 | // Subjects (At least one of these should be set). 612 | optional Entity usernames = 1; // HTTP authentication based usernames. 613 | optional Entity ips = 2; 614 | optional Entity hostnames = 3; 615 | 616 | // Objects. 617 | required Entity urls = 4; 618 | } 619 | 620 | message HTTPPut { 621 | // Subjects (At least one of these should be set). 622 | optional Entity usernames = 1; // HTTP authentication based usernames. 623 | optional Entity ips = 2; 624 | optional Entity hostnames = 3; 625 | 626 | // Objects. 627 | required Entity urls = 4; 628 | } 629 | } 630 | 631 | 632 | /* 633 | * Collection of ACL. 634 | * 635 | * Each authorization request is evaluated against the ACLs in the order 636 | * they are defined. 637 | * 638 | * For simplicity, the ACLs for a given action are not aggregated even 639 | * when they have the same subjects or objects. The first ACL that 640 | * matches the request determines whether that request should be 641 | * permitted or not. An ACL matches iff both the subjects 642 | * (e.g., clients, principals) and the objects (e.g., urls, users, 643 | * roles) of the ACL match the request. 644 | * 645 | * If none of the ACLs match the request, the 'permissive' field 646 | * determines whether the request should be permitted or not. 647 | * 648 | * TODO(vinod): Do aggregation of ACLs when possible. 649 | * 650 | */ 651 | message ACLs { 652 | optional bool permissive = 1 [default = true]; 653 | repeated ACL.RunTasks run_tasks = 2; 654 | repeated ACL.ReceiveOffers receive_offers = 3; 655 | repeated ACL.HTTPGet http_get = 4; 656 | repeated ACL.HTTPPut http_put = 5; 657 | } 658 | --------------------------------------------------------------------------------