├── .gitignore ├── README.md ├── bin ├── build-executor ├── executor ├── framework └── setup ├── executor.py ├── framework.py └── requirements.pip /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | var 14 | sdist 15 | develop-eggs 16 | .installed.cfg 17 | lib 18 | lib64 19 | __pycache__ 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | bin/environment.sh 37 | bin/env 38 | .DS_Store 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | mesos-python-framework 3 | ====================== 4 | 5 | Example Mesos Framework written in Python. The goal of this framework is for testing mesos deployments to ensure they're behaving correctly. It also serves as a solid, simple example of a Python Mesos Framework. 6 | -------------------------------------------------------------------------------- /bin/build-executor: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Script for building the mesos-py-test executor tar file. This script will generate 5 | # a file in ../dist/mesos-py-test-{git hash}.tar.gz to be uploaded to somewhere accessible 6 | # from your mesos cluster. 7 | # 8 | # If you have uncommitted changes, the script will do nothing with the FORCE 9 | # environment variable set. 10 | # 11 | 12 | set -e 13 | 14 | cd $(dirname $(dirname $0)) 15 | 16 | # Help the user out by warning them of uncommitted changes 17 | if [ -n "$(git status --porcelain)" ]; then 18 | if [ ! "$FORCE" ]; then 19 | echo "You appear to have uncommitted changes, use FORCE=1 to proceed." 20 | exit 1 21 | fi 22 | fi 23 | 24 | # Figure out the git revision 25 | GIT_REV=$(git rev-parse HEAD) 26 | SOURCE_DIR=$(pwd) 27 | TAR_NAME="mesos-py-test-$GIT_REV" 28 | TMP_DIR=$(mktemp -d -t XXXXXX) 29 | 30 | echo "Using temporary directory $TMP_DIR" 31 | 32 | # Create a copy of the repository 33 | mkdir -p dist 34 | pushd dist 35 | pushd $TMP_DIR 36 | mkdir build 37 | cp -r $SOURCE_DIR/** build/ 38 | 39 | pushd build 40 | rm -rf *.pyc 41 | rm -rf bin/env 42 | rm -rf dist 43 | popd 44 | 45 | mv build $TAR_NAME 46 | tar -cvf $TAR_NAME.tar.gz $TAR_NAME 47 | popd 48 | 49 | mv $TMP_DIR/$TAR_NAME.tar.gz ./ 50 | 51 | echo "Cleaning up temporary directory" 52 | rm -rf $TMP_DIR 53 | popd 54 | -------------------------------------------------------------------------------- /bin/executor: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd $(dirname $(dirname "$0")) 6 | ./bin/setup 1>&2 7 | 8 | source ./bin/env/bin/activate 9 | python2.7 executor.py "$@" 10 | -------------------------------------------------------------------------------- /bin/framework: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd $(dirname $(dirname "$0")) 6 | ./bin/setup 1>&2 7 | 8 | source ./bin/env/bin/activate 9 | python2.7 framework.py "$@" 10 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ___ _ __ _ 4 | # / / |__ (_)_ __ / /__ ___| |_ _ _ _ __ 5 | # / /| '_ \| | '_ \ / / __|/ _ \ __| | | | '_ \ 6 | # _ / / | |_) | | | | |/ /\__ \ __/ |_| |_| | |_) | 7 | # (_)_/ |_.__/|_|_| |_/_/ |___/\___|\__|\__,_| .__/ 8 | # |_| 9 | 10 | set -e 11 | set -x 12 | 13 | cd $(dirname "$0") 14 | 15 | function check_deps() { 16 | missing= 17 | while [ $1 ]; do 18 | if [ ! $(which $1) ]; then 19 | echo "Could not find $1." 20 | missing=1 21 | fi 22 | shift 23 | done 24 | if [ $missing ]; then 25 | echo "Not all dependencies found on PATH. Aborting." 26 | exit 1 27 | fi 28 | } 29 | 30 | check_deps python2.7 virtualenv 31 | 32 | export PYTHONPATH= 33 | export CFLAGS=-Qunused-arguments 34 | export CPPFLAGS=-Qunused-arguments 35 | 36 | # Install the requirements 37 | if [ ! -f "./env/updated" -o ../requirements.pip -nt ./env/updated ]; then 38 | virtualenv ./env -p python2.7 39 | ./env/bin/pip install -U distribute==0.6.46 40 | ./env/bin/pip install -r ../requirements.pip 41 | if [ $? -ne 0 ]; then 42 | echo "There was a problem installing requirements. Aborting." 43 | exit 1 44 | fi 45 | 46 | echo Base requirements installed. 47 | touch ./env/updated 48 | fi 49 | -------------------------------------------------------------------------------- /executor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # _ _ 4 | # _____ ____ _ _ __ ___ _ __ | | ___ _____ _____ ___ _ _| |_ ___ _ __ 5 | # / _ \ \/ / _` | '_ ` _ \| '_ \| |/ _ \_____ / _ \ \/ / _ \/ __| | | | __/ _ \| '__| 6 | # | __/> < (_| | | | | | | |_) | | __/_____| __/> < __/ (__| |_| | || (_) | | 7 | # \___/_/\_\__,_|_| |_| |_| .__/|_|\___| \___/_/\_\___|\___|\__,_|\__\___/|_| 8 | # |_| 9 | # 10 | 11 | import logging 12 | import threading 13 | import time 14 | 15 | import pesos.api 16 | import pesos.executor 17 | from pesos.vendor.mesos import mesos_pb2 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class ExampleExecutor(pesos.api.Executor): 23 | 24 | def launch_task(self, driver, task): 25 | 26 | logger.info("HELLO I AM TASK") 27 | 28 | def run_task(): 29 | logger.info("Launching task %s", task.task_id.value) 30 | 31 | update = mesos_pb2.TaskStatus() 32 | update.task_id.value = task.task_id.value 33 | update.state = mesos_pb2.TASK_RUNNING 34 | driver.send_status_update(update) 35 | 36 | time.sleep(15) 37 | 38 | update = mesos_pb2.TaskStatus() 39 | update.task_id.value = task.task_id.value 40 | update.state = mesos_pb2.TASK_FINISHED 41 | 42 | # Send the terminal update 43 | driver.send_status_update(update) 44 | 45 | thread = threading.Thread(target=run_task) 46 | thread.daemon = True 47 | thread.start() 48 | 49 | 50 | if __name__ == "__main__": 51 | 52 | for l in ('pesos', 'compactor', 'tornado', '__main__'): 53 | l = logging.getLogger(l) 54 | l.setLevel(logging.DEBUG) 55 | 56 | # Launch the executor driver 57 | driver = pesos.executor.MesosExecutorDriver(ExampleExecutor()) 58 | 59 | status = 0 60 | if driver.run() == mesos_pb2.DRIVER_STOPPED: 61 | status = 1 62 | 63 | driver.stop() 64 | exit(status) 65 | -------------------------------------------------------------------------------- /framework.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # _ __ _ 4 | # _____ ____ _ _ __ ___ _ __ | | ___ / _|_ __ __ _ _ __ ___ _____ _____ _ __| | __ 5 | # / _ \ \/ / _` | '_ ` _ \| '_ \| |/ _ \_____| |_| '__/ _` | '_ ` _ \ / _ \ \ /\ / / _ \| '__| |/ / 6 | # | __/> < (_| | | | | | | |_) | | __/_____| _| | | (_| | | | | | | __/\ V V / (_) | | | < 7 | # \___/_/\_\__,_|_| |_| |_| .__/|_|\___| |_| |_| \__,_|_| |_| |_|\___| \_/\_/ \___/|_| |_|\_\ 8 | # |_| 9 | # 10 | 11 | import Queue 12 | import argparse 13 | import os 14 | import threading 15 | import time 16 | import logging 17 | 18 | 19 | import pesos.api 20 | import pesos.scheduler 21 | from pesos.vendor.mesos import mesos_pb2 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | class ExampleScheduler(pesos.api.Scheduler): 27 | """Example scheduler that launches tasks that don't do a whole lot. 28 | """ 29 | 30 | TASK_CPU = 0.1 31 | TASK_MEM = 256 32 | 33 | def __init__(self, taskQueue): 34 | 35 | # Maintain a queue of the tasks to launch 36 | self.tasks = taskQueue 37 | 38 | self.terminal = 0 39 | self.total_tasks = taskQueue.qsize() 40 | 41 | def registered(self, driver, frameworkId, masterInfo): 42 | """ 43 | Invoked when the scheduler successfully registers with a Mesos 44 | master. A unique ID (generated by the master) used for 45 | distinguishing this framework from others and MasterInfo 46 | with the ip and port of the current master are provided as arguments. 47 | """ 48 | 49 | logger.info("Registered framework %s" % (frameworkId.value)) 50 | 51 | def reregistered(self, driver, masterInfo): 52 | """ 53 | Invoked when the scheduler re-registers with a newly elected Mesos master. 54 | This is only called when the scheduler has previously been registered. 55 | MasterInfo containing the updated information about the elected master 56 | is provided as an argument. 57 | """ 58 | 59 | logger.info("Connected with master %s" % (masterInfo.ip)) 60 | 61 | def disconnected(self, driver): 62 | """ 63 | Invoked when the scheduler becomes "disconnected" from the master 64 | (e.g., the master fails and another is taking over). 65 | """ 66 | 67 | logger.info("Disconnected from master") 68 | 69 | def resource_offers(self, driver, offers): 70 | """ 71 | Invoked when resources have been offered to this framework. A 72 | single offer will only contain resources from a single slave. 73 | 74 | Resources associated with an offer will not be re-offered to 75 | _this_ framework until either (a) this framework has rejected 76 | those resources (see SchedulerDriver::launchTasks) or (b) those 77 | resources have been rescinded (see Scheduler::offerRescinded). 78 | 79 | Note that resources may be concurrently offered to more than one 80 | framework at a time (depending on the allocator being used). In 81 | that case, the first framework to launch tasks using those 82 | resources will be able to use them while the other frameworks 83 | will have those resources rescinded (or if a framework has 84 | already launched tasks with those resources then those tasks will 85 | fail with a TASK_LOST status and a message saying as much). 86 | """ 87 | 88 | logger.info("Received %d offers" % len(offers)) 89 | 90 | def handle_offers(): 91 | declined = [] 92 | 93 | # Loop over the offers and see if there's anything that looks good 94 | for offer in offers: 95 | offer_cpu = 0 96 | offer_mem = 0 97 | 98 | if self.tasks.empty(): 99 | declined.append(offer.id) 100 | continue 101 | 102 | # Collect up the CPU and Memory resources from the offer 103 | for resource in offer.resources: 104 | if resource.name == "cpus": 105 | offer_cpu = resource.scalar.value 106 | if resource.name == "mem": 107 | offer_mem = resource.scalar.value 108 | 109 | tasks = [] 110 | 111 | # Keep looking for tasks until any of the following criteria are met 112 | # - No more CPU left in the offer 113 | # - No more Memory left in the offer 114 | # - No more tasks left to launch 115 | while offer_mem >= self.TASK_MEM and offer_cpu >= self.TASK_CPU \ 116 | and not self.tasks.empty(): \ 117 | 118 | offer_cpu -= self.TASK_CPU 119 | offer_mem -= self.TASK_MEM 120 | 121 | # Pop a task off the queue 122 | executor_id, task_id, args = self.tasks.get() 123 | self.tasks.task_done() # Mark it as done immediately 124 | 125 | logger.info("Queue task %d:%d" % (executor_id, task_id)) 126 | tasks.append(self._build_task(offer, executor_id, task_id, args)) 127 | 128 | # If we have any tasks to launch, ask the driver to launch them. 129 | if tasks: 130 | driver.launch_tasks(offer.id, tasks) 131 | 132 | # Decline the offers in batch 133 | if declined: 134 | driver.decline_offer(declined) 135 | 136 | t = threading.Thread(target=handle_offers) 137 | t.start() 138 | 139 | def _build_task(self, offer, executor_id, task_id, args): 140 | """ 141 | Create a TaskInfo object for an offer, executor_id and task_id. 142 | """ 143 | 144 | # Create the initial TaskInfo object 145 | task = mesos_pb2.TaskInfo() 146 | task.name = "Test Framework Task" 147 | task.task_id.value = "%d:%d" % (executor_id, task_id) 148 | task.slave_id.value = offer.slave_id.value 149 | 150 | # Configure the executor 151 | task.executor.executor_id.value = str(executor_id) 152 | task.executor.framework_id.value = offer.framework_id.value 153 | 154 | uri = task.executor.command.uris.add() 155 | uri.value = args.executor_uri 156 | 157 | task.executor.command.value = "./%s/bin/executor" % os.path.basename(uri.value).split(".")[0] 158 | 159 | # Add the task resource 160 | cpus = task.resources.add() 161 | cpus.name = "cpus" 162 | cpus.type = mesos_pb2.Value.SCALAR 163 | cpus.scalar.value = self.TASK_CPU 164 | 165 | mem = task.resources.add() 166 | mem.name = "mem" 167 | mem.type = mesos_pb2.Value.SCALAR 168 | mem.scalar.value = self.TASK_MEM 169 | 170 | return task 171 | 172 | def offer_rescinded(self, driver, offerId): 173 | """ 174 | Invoked when an offer is no longer valid (e.g., the slave was 175 | lost or another framework used resources in the offer). If for 176 | whatever reason an offer is never rescinded (e.g., dropped 177 | message, failing over framework, etc.), a framework that attempts 178 | to launch tasks using an invalid offer will receive TASK_LOST 179 | status updates for those tasks (see Scheduler::resourceOffers). 180 | """ 181 | 182 | logger.info("Offer rescinded %s" % (offerId.value)) 183 | 184 | def status_update(self, driver, taskStatus): 185 | """ 186 | Invoked when the status of a task has changed (e.g., a slave is 187 | lost and so the task is lost, a task finishes and an executor 188 | sends a status update saying so, etc). Note that returning from 189 | this callback _acknowledges_ receipt of this status update! If 190 | for whatever reason the scheduler aborts during this callback (or 191 | the process exits) another status update will be delivered (note, 192 | however, that this is currently not true if the slave sending the 193 | status update is lost/fails during that time). 194 | """ 195 | 196 | statuses = { 197 | mesos_pb2.TASK_STAGING: "STAGING", 198 | mesos_pb2.TASK_STARTING: "STARTING", 199 | mesos_pb2.TASK_RUNNING: "RUNNING", 200 | mesos_pb2.TASK_FINISHED: "FINISHED", 201 | mesos_pb2.TASK_FAILED: "FAILED", 202 | mesos_pb2.TASK_KILLED: "KILLED", 203 | mesos_pb2.TASK_LOST: "LOST", 204 | } 205 | 206 | logger.info("Received status update for task %s (%s)" % ( 207 | taskStatus.task_id.value, 208 | statuses[taskStatus.state] 209 | )) 210 | 211 | if taskStatus.state == mesos_pb2.TASK_FINISHED or \ 212 | taskStatus.state == mesos_pb2.TASK_FAILED or \ 213 | taskStatus.state == mesos_pb2.TASK_KILLED or \ 214 | taskStatus.state == mesos_pb2.TASK_LOST: \ 215 | 216 | # Mark this task as terminal 217 | self.terminal += 1 218 | 219 | if self.terminal == self.total_tasks: 220 | driver.stop() 221 | 222 | def framework_message(self, driver, executorId, slaveId, data): 223 | """ 224 | Invoked when an executor sends a message. These messages are best 225 | effort; do not expect a framework message to be retransmitted in 226 | any reliable fashion. 227 | """ 228 | 229 | logger.info("Message from executor %s and slave %s: %s" % ( 230 | executorId.value, 231 | slaveId.value, 232 | data 233 | )) 234 | 235 | def slave_lost(self, driver, slaveId): 236 | """ 237 | Invoked when a slave has been determined unreachable (e.g., 238 | machine failure, network partition). Most frameworks will need to 239 | reschedule any tasks launched on this slave on a new slave. 240 | """ 241 | 242 | logger.info("Slave %s has been lost. Y U DO DIS." % (slaveId.value)) 243 | 244 | def executor_lost(self, driver, executorId, slaveId, exitCode): 245 | """ 246 | Invoked when an executor has exited/terminated. Note that any 247 | tasks running will have TASK_LOST status updates automagically 248 | generated. 249 | """ 250 | 251 | logger.info("Executor %s has been lost on slave %s with exit code %d" % ( 252 | executorId.value, 253 | slaveId.value, 254 | exitCode 255 | )) 256 | 257 | def error(self, driver, message): 258 | """ 259 | Invoked when there is an unrecoverable error in the scheduler or 260 | scheduler driver. The driver will be aborted BEFORE invoking this 261 | callback. 262 | """ 263 | 264 | logger.info("There was an error: %s" % (message)) 265 | 266 | 267 | if __name__ == "__main__": 268 | 269 | for l in ('pesos', 'compactor', 'tornado', '__main__'): 270 | l = logging.getLogger(l) 271 | l.setLevel(logging.DEBUG) 272 | 273 | parser = argparse.ArgumentParser(prog="docker-launcher") 274 | parser.add_argument("-m", "--master", required=True, type=str, 275 | help="IP/Port of mesos master") 276 | parser.add_argument("--num-tasks", default=1, type=int, 277 | help="Number of tasks to launch per executor (default: 1)") 278 | parser.add_argument("--num-executors", default=1, type=int, 279 | help="Number of executors to launch (default: 1)") 280 | parser.add_argument("--executor-uri", required=True, type=str, 281 | help="URL to download a version of this code.") 282 | 283 | args = parser.parse_args() 284 | 285 | # Setup the loggers 286 | loggers = (__name__, "tornado", "pesos", "compactor") 287 | for log in loggers: 288 | logging.getLogger(log).setLevel(logging.DEBUG) 289 | 290 | # Create the queue of tasks 291 | tasks = Queue.Queue() 292 | for task in xrange(args.num_tasks): 293 | for executor in xrange(args.num_executors): 294 | tasks.put((executor, task, args)) 295 | 296 | # Launch the mesos framework 297 | framework = mesos_pb2.FrameworkInfo() 298 | framework.name = "Test Python Framework" 299 | framework.user = "root" 300 | 301 | driver = pesos.scheduler.MesosSchedulerDriver( 302 | ExampleScheduler(tasks), 303 | framework, 304 | args.master 305 | ) 306 | 307 | t = threading.Thread(target=driver.run) 308 | t.setDaemon(True) 309 | t.start() 310 | 311 | while t.isAlive(): 312 | time.sleep(0.5) 313 | -------------------------------------------------------------------------------- /requirements.pip: -------------------------------------------------------------------------------- 1 | git+git://github.com/duedil-ltd/tornado.git@1ceefdd89be8c3df217d79e0faa2d1948a231646#egg=tornado 2 | git+git://github.com/wickman/compactor.git@0a16c6f8af55171d46cc03d4abfa4e3d5ef8ea38#egg=compactor 3 | git+git://github.com/tarnfeld/pesos.git@33515da517bc1222a58de9fdfb0cbe614a469c0c#egg=pesos 4 | trollius==0.4 5 | --------------------------------------------------------------------------------