├── .dockerignore ├── .env.example ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── docker-compose.execution.yml ├── docker-compose.networking.yml ├── docker-services ├── flower │ ├── Dockerfile │ └── requirements.txt └── rabbitmq │ ├── Dockerfile │ ├── config_rabbit.sh │ ├── enabled_plugins │ ├── init.sh │ └── rabbitmq.config ├── docker-setup-execution.sh ├── docker-start-master.sh ├── docker-start-worker.sh ├── entrypoint-master.sh ├── entrypoint-worker.sh ├── env-set-token.sh ├── get-module-names.sh ├── modules.csv.example ├── requirements.txt ├── src ├── api.py ├── constants.py ├── mlsploit.py └── utils │ └── __init__.py └── wait-for-rabbitmq.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .venv 2 | Pipfile 3 | Pipfile.lock 4 | 5 | *__pycache__ 6 | *.pyc 7 | 8 | .scratch 9 | docker-services 10 | 11 | celerybeat-schedule 12 | celerybeat-schedule.db 13 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | MLSPLOIT_API_ADMIN_TOKEN= 2 | 3 | MLSPLOIT_API_BASE_URL=http://api-host/api/v1 4 | MLSPLOIT_BACKEND_URL=redis://redis-host:6379 5 | MLSPLOIT_BROKER_URL=amqp://admin:password@rabbitmq-host:5672/mlsploit 6 | 7 | MLSPLOIT_EXECUTION_JOB_CONCURRENCY=5 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | *.pyc 4 | *__pycache__ 5 | .mypy_cache/ 6 | 7 | .idea/ 8 | 9 | # Local scratch directory 10 | .scratch 11 | 12 | # Environments 13 | .env 14 | .venv 15 | Pipfile 16 | Pipfile.lock 17 | 18 | celerybeat-schedule 19 | celerybeat-schedule.db 20 | 21 | # Modules file 22 | /modules.csv 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker:stable 2 | 3 | VOLUME /app 4 | 5 | RUN apk upgrade --no-cache \ 6 | && apk add --no-cache --update git python3 py-pip 7 | 8 | ENV PYTHONUNBUFFERED 1 9 | 10 | COPY requirements.txt / 11 | RUN pip3 install --upgrade pip \ 12 | && pip install -r /requirements.txt \ 13 | && rm -rf /root/.cache/pip/wheels/* 14 | 15 | ARG STAGE 16 | ARG BASE_DIR 17 | ENV CONTAINER_BUILD_STAGE=$STAGE 18 | ENV MLSPLOIT_DOCKER_HOST_BASE_DIR=$BASE_DIR 19 | 20 | WORKDIR /app 21 | 22 | ENTRYPOINT sh entrypoint-${CONTAINER_BUILD_STAGE}.sh 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Nilaksh Das 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Execution Orchestrator for MLsploit 2 | 3 | ## ENVIRONMENT SETUP 4 | 5 | The primary configuration settings of the execution module are stored 6 | inside the `.env` file, which you'll need to modify 7 | according to your needs. 8 | 9 | The first thing you should update is the `MLSPLOIT_API_ADMIN_TOKEN`, 10 | which is necessary for accessing the data API for MLsploit. 11 | 12 | ## RUN WITH DOCKER 13 | 14 | We use `docker-compose` to orchestrate the setup and execution of the service. 15 | You only need to setup `docker` on your system and then run the following command: 16 | 17 | ```bash 18 | $ bash docker-start-master.sh 19 | ``` 20 | 21 | This starts the execution service in *MASTER* mode. 22 | The *MASTER* mode of the execution service runs the 23 | scheduling, networking, monitoring and execution of the jobs. 24 | The *WORKER* mode, which only handles the execution of the jobs, 25 | can be run in parallel on another system using the following command: 26 | 27 | ```bash 28 | $ bash docker-start-worker.sh 29 | ``` 30 | 31 | For the *WORKER* mode, you will need to update the host environment variables in the `.env` file 32 | to point to the *MASTER* node. 33 | 34 | ## MANUAL SETUP 35 | 36 | ### Install the dependencies 37 | 38 | ```bash 39 | $ pip install -r requirements.txt 40 | ``` 41 | 42 | 43 | ### Set up the environment 44 | 45 | ```bash 46 | $ export MLSPLOIT_BROKER_URL='amqp://admin:password@localhost:5672/mlsploit' 47 | $ export MLSPLOIT_BACKEND_URL='redis://localhost:6379' 48 | $ export MLSPLOIT_API_ADMIN_TOKEN= 49 | ``` 50 | 51 | ## USAGE 52 | 53 | ```bash 54 | $ celery worker -A mlsploit -B \ 55 | -l info \ 56 | -Ofair \ 57 | -c 5 58 | ``` 59 | 60 | The `-B` flag should be enabled only on one node worker since it queues the pending jobs. 61 | -------------------------------------------------------------------------------- /docker-compose.execution.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | mlsploit-execution-service: 5 | build: 6 | context: . 7 | args: 8 | - BASE_DIR=$PWD 9 | network_mode: host 10 | extra_hosts: 11 | - 'api-host:127.0.0.1' 12 | - 'redis-host:127.0.0.1' 13 | - 'rabbitmq-host:127.0.0.1' 14 | volumes: 15 | - /var/run/docker.sock:/var/run/docker.sock 16 | - .:/app 17 | env_file: 18 | - .env 19 | environment: 20 | RUNNING_IN_DOCKER: 'true' 21 | -------------------------------------------------------------------------------- /docker-compose.networking.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | mlsploit-networking-backend-service: 5 | image: redis:latest 6 | ports: 7 | - 6379:6379 8 | 9 | mlsploit-networking-broker-service: 10 | build: ./docker-services/rabbitmq 11 | ports: 12 | - 5672:5672 13 | - 15672:15672 14 | -------------------------------------------------------------------------------- /docker-services/flower/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6-alpine 2 | 3 | COPY requirements.txt /requirements.txt 4 | RUN pip install -r /requirements.txt 5 | 6 | ENTRYPOINT ["flower"] 7 | CMD ["--port=5555", \ 8 | "--broker=amqp://admin:password@rabbitmq-service:5672/", \ 9 | "--broker_api=http://admin:password@rabbitmq-service:15672/api/"] 10 | -------------------------------------------------------------------------------- /docker-services/flower/requirements.txt: -------------------------------------------------------------------------------- 1 | flower==0.9.3 2 | tornado==6.0.4 3 | -------------------------------------------------------------------------------- /docker-services/rabbitmq/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rabbitmq:latest 2 | 3 | EXPOSE 5672 4 | EXPOSE 15672 5 | 6 | COPY enabled_plugins /etc/rabbitmq/ 7 | COPY rabbitmq.config /etc/rabbitmq/ 8 | 9 | COPY init.sh / 10 | COPY config_rabbit.sh / 11 | 12 | RUN chmod 777 /init.sh /config_rabbit.sh 13 | 14 | CMD ["/init.sh"] 15 | -------------------------------------------------------------------------------- /docker-services/rabbitmq/config_rabbit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script needs to be executed just once 4 | if [ -f /$0.completed ] ; then 5 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] /$0.completed found, skipping run" 6 | exit 0 7 | fi 8 | 9 | # Wait for RabbitMQ startup 10 | for (( ; ; )) ; do 11 | sleep 2 12 | rabbitmqctl -q node_health_check > /dev/null 2>&1 13 | if [ $? -eq 0 ] ; then 14 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] rabbitmq is now running" 15 | break 16 | else 17 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] waiting for rabbitmq startup" 18 | fi 19 | done 20 | 21 | # Execute RabbitMQ config commands here 22 | 23 | # Add vhosts 24 | rabbitmqctl add_vhost mlsploit 25 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] vhosts created" 26 | 27 | # Set permissions for vhosts 28 | rabbitmqctl set_permissions -p mlsploit admin ".*" ".*" ".*" 29 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] permissions set for vhosts" 30 | 31 | # Create mark so script is not run again 32 | touch /$0.completed 33 | -------------------------------------------------------------------------------- /docker-services/rabbitmq/enabled_plugins: -------------------------------------------------------------------------------- 1 | [rabbitmq_management, rabbitmq_management_visualiser]. -------------------------------------------------------------------------------- /docker-services/rabbitmq/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /config_rabbit.sh & rabbitmq-server $@ 4 | -------------------------------------------------------------------------------- /docker-services/rabbitmq/rabbitmq.config: -------------------------------------------------------------------------------- 1 | [ 2 | {rabbit, 3 | [ 4 | %% The default "guest" user is only permitted to access the server 5 | %% via a loopback interface (e.g. localhost). 6 | %% {loopback_users, [<<"guest">>]}, 7 | %% 8 | %% Uncomment the following line if you want to allow access to the 9 | %% guest user from anywhere on the network. 10 | {loopback_users, ["guest"]}, 11 | {default_vhost, "/"}, 12 | {default_user, "admin"}, 13 | {default_pass, "password"}, 14 | {default_permissions, [".*", ".*", ".*"]} 15 | ]} 16 | ]. 17 | -------------------------------------------------------------------------------- /docker-setup-execution.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)" 4 | 5 | if [[ ! -f .env ]]; then 6 | cp .env.example .env 7 | fi 8 | 9 | MASTER_DOCKER_PROJECT_NAME=mlsploit-execution-backend-master 10 | WORKER_DOCKER_PROJECT_NAME=mlsploit-execution-backend-worker 11 | 12 | docker-compose \ 13 | -p ${MASTER_DOCKER_PROJECT_NAME} \ 14 | -f docker-compose.execution.yml \ 15 | -f docker-compose.networking.yml \ 16 | build \ 17 | --build-arg STAGE=master 18 | 19 | docker-compose \ 20 | -p ${WORKER_DOCKER_PROJECT_NAME} \ 21 | -f docker-compose.execution.yml \ 22 | build \ 23 | --build-arg STAGE=worker 24 | -------------------------------------------------------------------------------- /docker-start-master.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)" 4 | 5 | DOCKER_PROJECT_NAME=mlsploit-execution-backend-master 6 | 7 | docker-compose \ 8 | -p ${DOCKER_PROJECT_NAME} \ 9 | -f docker-compose.execution.yml \ 10 | -f docker-compose.networking.yml \ 11 | up 12 | -------------------------------------------------------------------------------- /docker-start-worker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)" 4 | 5 | DOCKER_PROJECT_NAME=mlsploit-execution-backend-worker 6 | 7 | docker-compose \ 8 | -p ${DOCKER_PROJECT_NAME} \ 9 | -f docker-compose.execution.yml \ 10 | up 11 | -------------------------------------------------------------------------------- /entrypoint-master.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -a && . .env && set +a 4 | 5 | if [[ -z "${MLSPLOIT_API_ADMIN_TOKEN}" ]] 6 | then 7 | echo "[ERROR] MLSPLOIT_API_ADMIN_TOKEN is not set" 8 | exit 1 9 | fi 10 | 11 | ./wait-for-rabbitmq.py || exit 1 12 | 13 | cd ./src 14 | 15 | CELERY_ID=mlsploit.master@%h 16 | 17 | celery worker -A mlsploit -B \ 18 | -l info \ 19 | -Ofair \ 20 | -n ${CELERY_ID} \ 21 | -Q housekeeping \ 22 | -c ${MLSPLOIT_EXECUTION_JOB_CONCURRENCY} 23 | -------------------------------------------------------------------------------- /entrypoint-worker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -a && . .env && set +a 4 | 5 | if [[ -z "${MLSPLOIT_API_ADMIN_TOKEN}" ]] 6 | then 7 | echo "[ERROR] MLSPLOIT_API_ADMIN_TOKEN is not set" 8 | exit 1 9 | fi 10 | 11 | export MLSPLOIT_MODULES=$(./get-module-names.sh) 12 | 13 | ./wait-for-rabbitmq.py || exit 1 14 | 15 | cd ./src 16 | 17 | DATE_STR=$(date +%s) 18 | RAND_STR=$(python3 -c "from __future__ import print_function; from coolname import generate_slug; print(generate_slug(2))") 19 | CELERY_ID=mlsploit.worker.${DATE_STR}.${RAND_STR}@%h 20 | 21 | celery worker -A mlsploit \ 22 | -Ofair \ 23 | -l info \ 24 | -n ${CELERY_ID} \ 25 | -Q ${MLSPLOIT_MODULES:-celery} \ 26 | -c ${MLSPLOIT_EXECUTION_JOB_CONCURRENCY} 27 | -------------------------------------------------------------------------------- /env-set-token.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)" 4 | 5 | if [[ ! -f .env ]]; then 6 | cp .env.example .env 7 | fi 8 | 9 | function usage() { 10 | echo "usage: bash env-set-token.sh " 11 | } 12 | 13 | function update_env() { 14 | KEY="$1"; VAL="$2"; LINE_NUM=$(grep -nm 1 "^${KEY}=" .env | cut -f1 -d:) 15 | (sed "${LINE_NUM}s/.*/${KEY}=${VAL}/" .env > .env.tmp) && mv .env.tmp .env 16 | } 17 | 18 | if [[ -z $1 ]]; then 19 | usage 20 | exit 1 21 | fi 22 | 23 | MLSPLOIT_API_ADMIN_TOKEN="$1" 24 | update_env MLSPLOIT_API_ADMIN_TOKEN "$MLSPLOIT_API_ADMIN_TOKEN" 25 | -------------------------------------------------------------------------------- /get-module-names.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | MODULES="" 4 | OLD_IFS=$IFS 5 | if [[ -f modules.csv ]]; then 6 | while IFS=, read -r NAME REPO BRANCH; do 7 | MODULES="$MODULES,$NAME" 8 | done < modules.csv 9 | fi 10 | IFS=$OLD_IFS 11 | echo ${MODULES#,} 12 | -------------------------------------------------------------------------------- /modules.csv.example: -------------------------------------------------------------------------------- 1 | foolbox,https://github.com/mlsploit/foolbox-mlsploit.git,master 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cachetools==4.0.0 2 | celery==4.4.1 3 | coolname==1.1.0 4 | docker==4.2.0 5 | gitpython==3.1.0 6 | pika==1.1.0 7 | redis==3.4.1 8 | requests==2.23.0 9 | tornado==6.0.4 10 | -------------------------------------------------------------------------------- /src/api.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from urllib.parse import urlparse, ParseResult 4 | 5 | from cachetools import TTLCache 6 | import requests 7 | 8 | 9 | API_BASE_URL = os.getenv("MLSPLOIT_API_BASE_URL") 10 | assert API_BASE_URL, """ 11 | MLSPLOIT_API_BASE_URL environment variable not found. 12 | $ export MLSPLOIT_API_BASE_URL='http://localhost:8000/api/v1' 13 | """ 14 | 15 | 16 | class RestClient: 17 | _token = None 18 | _cache = TTLCache(maxsize=1000, ttl=2) 19 | 20 | @classmethod 21 | def _make_auth_header(cls): 22 | headers = dict() 23 | if cls._token is not None: 24 | headers["Authorization"] = "Token %s" % cls._token 25 | return headers 26 | 27 | @classmethod 28 | def set_token(cls, token): 29 | cls._token = token 30 | 31 | @classmethod 32 | def get(cls, url, params=None, headers=None): 33 | params = params or dict() 34 | headers = headers or dict() 35 | headers.update(cls._make_auth_header()) 36 | 37 | key = (url, tuple(params.items()), tuple(headers.items())) 38 | 39 | if key not in cls._cache: 40 | r = requests.get(url, params=params, headers=headers) 41 | 42 | cls._cache[key] = r.text 43 | data = json.loads(cls._cache[key]) 44 | 45 | return data 46 | 47 | @classmethod 48 | def post(cls, url, payload, files=None, headers=None): 49 | cls._cache.clear() 50 | 51 | headers = headers or dict() 52 | headers.update(cls._make_auth_header()) 53 | 54 | url += "/" if not url.endswith("/") else "" 55 | 56 | r = requests.post(url, data=payload, files=files, headers=headers) 57 | 58 | data = json.loads(r.text) 59 | return data 60 | 61 | @classmethod 62 | def patch(cls, url, payload, files=None, headers=None): 63 | cls._cache.clear() 64 | 65 | headers = headers or dict() 66 | headers.update(cls._make_auth_header()) 67 | 68 | url += "/" if not url.endswith("/") else "" 69 | 70 | r = requests.patch(url, data=payload, files=files, headers=headers) 71 | 72 | data = json.loads(r.text) 73 | return data 74 | 75 | @staticmethod 76 | def make_path(*args): 77 | p = "/".join(map(lambda x: x.strip("/"), args)) 78 | return p 79 | 80 | 81 | class ApiDataModel(object): 82 | _endpoint = None 83 | _expandable = dict() 84 | _json_props = list() 85 | 86 | def __init__(self, url): 87 | endpoint = urlparse(self._endpoint) 88 | endpoint = ParseResult("", *endpoint[1:]).geturl() 89 | assert endpoint in url 90 | super(ApiDataModel, self).__setattr__("_url", url) 91 | 92 | def __getattr__(self, item): 93 | data = RestClient.get(self._url) 94 | 95 | val = data[item] 96 | if item in self._expandable: 97 | if type(self._expandable[item]) is list: 98 | klass = self._expandable[item][0] 99 | val = list(klass(v) for v in val) 100 | 101 | elif val is not None: 102 | klass = self._expandable[item] 103 | val = klass(val) 104 | 105 | elif item in self._json_props: 106 | val = json.loads(val) 107 | 108 | return val 109 | 110 | def __setattr__(self, key, value): 111 | if key in self._json_props: 112 | value = json.dumps(value) 113 | 114 | RestClient.patch(self.url, {key: value}) 115 | 116 | def __repr__(self): 117 | return self._url 118 | 119 | @property 120 | def url(self): 121 | return self._url 122 | 123 | @classmethod 124 | def from_id(cls, id_): 125 | url = RestClient.make_path(cls._endpoint, str(id_)) + "/" 126 | return cls(url) 127 | 128 | @classmethod 129 | def create(cls, **kwargs): 130 | files = None 131 | if "blob" in kwargs: 132 | files = {"blob": kwargs["blob"]} 133 | del kwargs["blob"] 134 | 135 | for k in cls._json_props: 136 | if k in kwargs and type(kwargs[k]) in {dict, list}: 137 | kwargs[k] = json.dumps(kwargs[k]) 138 | 139 | r = RestClient.post(cls._endpoint, payload=kwargs, files=files) 140 | 141 | return cls(r["url"]) 142 | 143 | @classmethod 144 | def get_all(cls, params=None): 145 | all_data = RestClient.get(cls._endpoint, params=params) 146 | 147 | items = list() 148 | 149 | if type(all_data) is not list: 150 | return items 151 | 152 | for item_data in all_data: 153 | items.append(cls(item_data["url"])) 154 | 155 | return items 156 | 157 | 158 | class Module(ApiDataModel): 159 | _endpoint = RestClient.make_path(API_BASE_URL, "modules") 160 | _json_props = ["config"] 161 | 162 | 163 | class Function(ApiDataModel): 164 | _endpoint = RestClient.make_path(API_BASE_URL, "functions") 165 | _expandable = {"module": Module} 166 | _json_props = ["options", "optional_filetypes", "output_tags"] 167 | 168 | 169 | class User(ApiDataModel): 170 | _endpoint = RestClient.make_path(API_BASE_URL, "users") 171 | 172 | @classmethod 173 | def get_current(cls): 174 | try: 175 | current_user_endpoint = RestClient.make_path( 176 | API_BASE_URL.replace("/api/v1", ""), "auth", "user" 177 | ) 178 | current_user_data = RestClient.get(current_user_endpoint) 179 | current_user_url = current_user_data["url"] 180 | return cls(current_user_url) 181 | except: 182 | return None 183 | 184 | 185 | class File(ApiDataModel): 186 | _endpoint = RestClient.make_path(API_BASE_URL, "files") 187 | _expandable = {"owner": User} 188 | _json_props = ["tags"] 189 | 190 | 191 | class Task(ApiDataModel): 192 | _endpoint = RestClient.make_path(API_BASE_URL, "tasks") 193 | _expandable = {"owner": User, "function": Function} 194 | _json_props = ["arguments"] 195 | 196 | 197 | class Run(ApiDataModel): 198 | _endpoint = RestClient.make_path(API_BASE_URL, "runs") 199 | _expandable = {"owner": User, "files": [File]} 200 | 201 | 202 | class Job(ApiDataModel): 203 | _endpoint = RestClient.make_path(API_BASE_URL, "jobs") 204 | _expandable = {"owner": User, "task": Task, "run": Run, "output_files": [File]} 205 | _json_props = ["output"] 206 | 207 | @property 208 | def parent_job(self): 209 | parent_job_url = self.__getattr__("parent_job") 210 | if parent_job_url is None: 211 | return None 212 | 213 | return Job(parent_job_url) 214 | 215 | @classmethod 216 | def get_all_actionable(cls): 217 | all_pending_jobs = cls.get_all(params={"status": "PENDING"}) 218 | 219 | return list( 220 | filter( 221 | lambda j: j.parent_job is None or j.parent_job.status == "FINISHED", 222 | all_pending_jobs, 223 | ) 224 | ) 225 | -------------------------------------------------------------------------------- /src/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 5 | SCRATCH_DIR = os.path.join(BASE_DIR, ".scratch") 6 | BASE_DIR_DOCKER = os.getenv("MLSPLOIT_DOCKER_HOST_BASE_DIR") or BASE_DIR 7 | SCRATCH_DIR_DOCKER = os.path.join(BASE_DIR_DOCKER, ".scratch") 8 | os.makedirs(SCRATCH_DIR, exist_ok=True) 9 | 10 | APP_NAME = "mlsploit" 11 | 12 | BROKER_URL = os.getenv("MLSPLOIT_BROKER_URL") 13 | assert BROKER_URL, """ 14 | MLSPLOIT_BROKER_URL environment variable not found. 15 | $ export MLSPLOIT_BROKER_URL='amqp://admin:password@localhost:5672/mlsploit' 16 | """ 17 | 18 | BACKEND_URL = os.getenv("MLSPLOIT_BACKEND_URL") 19 | assert BACKEND_URL, """ 20 | MLSPLOIT_BACKEND_URL environment variable not found. 21 | $ export MLSPLOIT_BACKEND_URL='redis://localhost:6379' 22 | """ 23 | 24 | API_ADMIN_TOKEN = os.getenv("MLSPLOIT_API_ADMIN_TOKEN") 25 | assert API_ADMIN_TOKEN, """ 26 | MLSPLOIT_API_ADMIN_TOKEN environment variable not found. 27 | $ export MLSPLOIT_API_ADMIN_TOKEN='dd6f003f47b68e3fcd24fe5b3cade72168557d9f' 28 | """ 29 | 30 | EXECUTION_MODE = os.getenv("CONTAINER_BUILD_STAGE") 31 | 32 | BUILD_MODULES = os.getenv("MLSPLOIT_MODULES") 33 | BUILD_MODULES = list(filter(len, (BUILD_MODULES or "").split(","))) 34 | -------------------------------------------------------------------------------- /src/mlsploit.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import logging 4 | import os 5 | import shutil 6 | import tempfile 7 | import time 8 | from urllib.request import urlretrieve 9 | 10 | from celery import Celery 11 | from celery.signals import celeryd_after_setup 12 | import docker 13 | import git 14 | 15 | from api import File, Job, Module, RestClient, User 16 | from constants import * 17 | 18 | 19 | logger = logging.getLogger(__name__) 20 | logger.setLevel(logging.DEBUG) 21 | 22 | app = Celery(APP_NAME, broker=BROKER_URL, backend=BACKEND_URL) 23 | app.conf.update(worker_prefetch_multiplier=1, worker_send_task_events=True) 24 | app.conf.beat_schedule = { 25 | "fetch-jobs-every-10-seconds": { 26 | "task": "mlsploit.fetch_actionable_jobs", 27 | "options": {"queue": "housekeeping"}, 28 | "schedule": 10.0, 29 | } 30 | } 31 | 32 | RestClient.set_token(API_ADMIN_TOKEN) 33 | 34 | 35 | def check_master_online(): 36 | master_online = False 37 | workers = app.control.inspect().active() or dict() 38 | for worker_name in workers.keys(): 39 | master_online = master_online or worker_name.startswith("mlsploit.master") 40 | return master_online 41 | 42 | 43 | def setup_docker_images(): 44 | logger.info("Fetching all modules") 45 | modules, num_built = Module.get_all(), 0 46 | logger.debug(modules) 47 | 48 | logger.info("Initializing docker client") 49 | client = docker.APIClient(base_url="unix://var/run/docker.sock") 50 | logger.debug(client) 51 | 52 | for module in modules: 53 | name = module.name 54 | 55 | if "*" in BUILD_MODULES or name in BUILD_MODULES: 56 | logger.info("-" * 20) 57 | 58 | repo_url = module.repo_url 59 | repo_branch = module.repo_branch 60 | repo_dir = tempfile.mkdtemp() 61 | 62 | logger.debug(f"{name}: repo_url = {repo_url}") 63 | logger.debug(f"{name}: repo_branch = {repo_branch}") 64 | logger.debug(f"{name}: repo_dir = {repo_dir}") 65 | 66 | try: 67 | git.Repo.clone_from( 68 | repo_url, repo_dir, branch=repo_branch, recursive=True 69 | ) 70 | 71 | logger.info( 72 | f"{name}: Finished cloning repository, starting docker build" 73 | ) 74 | 75 | build_stream = client.build(path=repo_dir, tag=name, decode=True) 76 | for chunk in build_stream: 77 | if "stream" in chunk: 78 | for line in chunk["stream"].splitlines(): 79 | logger.debug(f"{name}: {line}") 80 | 81 | num_built += 1 82 | 83 | except Exception as e: 84 | logger.error(f"{name}: Failed to build docker image", exc_info=True) 85 | 86 | shutil.rmtree(repo_dir) 87 | logger.info("-" * 20) 88 | 89 | logger.info(f"Built docker images for {num_built} modules") 90 | 91 | 92 | @celeryd_after_setup.connect 93 | def startup(sender, instance, **kwargs): 94 | logger.debug(f"EXECUTION_MODE = {EXECUTION_MODE}") 95 | logger.debug(f"SCRATCH_DIR = {SCRATCH_DIR}") 96 | logger.debug(f"SCRATCH_DIR_DOCKER = {SCRATCH_DIR_DOCKER}") 97 | 98 | if EXECUTION_MODE == "master": 99 | wait = 5 100 | logger.info(f"Waiting {wait}s for other services to spin up") 101 | time.sleep(wait) 102 | 103 | else: 104 | logger.debug(f"BUILD_MODULES = {BUILD_MODULES}") 105 | 106 | master_online = check_master_online() 107 | while not master_online: 108 | wait = 5 # `check_master_online` already blocks on networking services 109 | logger.info(f"Waiting {wait}s for mlsploit.master to come online") 110 | time.sleep(wait) 111 | 112 | master_online = check_master_online() 113 | logger.info("Detected mlsploit.master is online!") 114 | 115 | if len(BUILD_MODULES) > 0: 116 | try: 117 | logger.info("Setting up modules") 118 | setup_docker_images() 119 | except Exception as e: 120 | logger.error("Setting up MLsploit modules failed", exc_info=True) 121 | else: 122 | logger.info("No modules to build!") 123 | 124 | 125 | @app.task 126 | def fetch_actionable_jobs(): 127 | jobs = Job.get_all_actionable() 128 | 129 | for job in jobs: 130 | job_module = job.task.function.module.name 131 | job.status = "QUEUED" 132 | promise = perform_job.s(job.id) 133 | promise.apply_async(queue=job_module) 134 | 135 | return [job.url for job in jobs] 136 | 137 | 138 | @app.task(bind=True) 139 | def perform_job(self, job_id): 140 | job_logs = str() 141 | job = Job.from_id(job_id) 142 | job.status = "RUNNING" 143 | 144 | def log(logtxt, lvl=logging.INFO): 145 | nonlocal job_logs 146 | 147 | logtxt = str(logtxt) 148 | logtxt = logtxt.strip(" \n") 149 | 150 | job_logs += logtxt + "\n" 151 | job.logs = job_logs 152 | if lvl == logging.ERROR: 153 | logger.error(f"{job_id}: {logtxt}", exc_info=True) 154 | else: 155 | logger.log(lvl, f"{job_id}: {logtxt}") 156 | 157 | log(("-" * 20) + "\n") 158 | 159 | try: 160 | output_json, output_file_names = dict(), list() 161 | 162 | # Get all data from API at once since it is time-cached 163 | current_user = User.get_current() 164 | current_user_url = current_user.url if current_user is not None else None 165 | job_url = job.url 166 | module = job.task.function.module 167 | module_name = module.name 168 | function_name = job.task.function.name 169 | arguments = job.task.arguments 170 | owner_url = job.owner.url 171 | parent_job = job.parent_job 172 | input_files = job.run.files if parent_job is None else parent_job.output_files 173 | input_file_names = [f.name for f in input_files] 174 | input_file_tags = {f.name: f.tags for f in input_files} 175 | input_file_urls = {f.name: f.url for f in input_files} 176 | input_file_blob_urls = {f.name: f.blob_url for f in input_files} 177 | 178 | log(f"Running function '{function_name}' from module '{module_name}'") 179 | log("Arguments = " + str(arguments), logging.DEBUG) 180 | log("Input Files = " + ", ".join(input_file_names), logging.DEBUG) 181 | 182 | # Create job folder with input and output directories 183 | job_dir = os.path.join(SCRATCH_DIR, "jobs", str(job_id)) 184 | input_dir = os.path.join(job_dir, "input") 185 | output_dir = os.path.join(job_dir, "output") 186 | job_dir_docker = os.path.join(SCRATCH_DIR_DOCKER, "jobs", str(job_id)) 187 | input_dir_docker = os.path.join(job_dir_docker, "input") 188 | output_dir_docker = os.path.join(job_dir_docker, "output") 189 | 190 | original_umask = os.umask(0) 191 | os.makedirs(job_dir, exist_ok=True) 192 | os.makedirs(input_dir, exist_ok=True) 193 | os.makedirs(output_dir, exist_ok=True) 194 | os.umask(original_umask) 195 | 196 | # Download input files 197 | for name, url in input_file_blob_urls.items(): 198 | urlretrieve(url, os.path.join(input_dir, name)) 199 | 200 | # Create input JSON file 201 | input_json_dict = { 202 | "name": function_name, 203 | "num_files": len(input_file_names), 204 | "files": input_file_names, 205 | "options": arguments, 206 | "tags": [input_file_tags[name] for name in input_file_names], 207 | } 208 | input_json_filepath = os.path.join(input_dir, "input.json") 209 | with open(input_json_filepath, "w") as f: 210 | json.dump(input_json_dict, f) 211 | 212 | # Run docker image 213 | client = docker.from_env() 214 | 215 | log("Starting job...") 216 | container = client.containers.run( 217 | "%s:latest" % module_name, 218 | detach=True, 219 | stdout=True, 220 | stderr=True, 221 | volumes={ 222 | input_dir_docker: {"bind": "/mnt/input", "mode": "rw"}, 223 | output_dir_docker: {"bind": "/mnt/output", "mode": "rw"}, 224 | }, 225 | environment=["PYTHONUNBUFFERED=1"], 226 | ) 227 | 228 | for line in container.logs(stream=True): 229 | log("[JOB] " + line.decode("utf-8"), logging.DEBUG) 230 | 231 | container_exit_status = container.wait() 232 | container_exit_status = container_exit_status["StatusCode"] 233 | container.remove() 234 | 235 | assert ( 236 | container_exit_status == 0 237 | ), f"Module container exited with status code {container_exit_status}" 238 | 239 | # Update output for job 240 | output_json_filepath = os.path.join(output_dir, "output.json") 241 | with open(output_json_filepath, "r") as f: 242 | output_json = json.load(f) 243 | job.output = output_json 244 | 245 | # Upload output files 246 | output_file_names = output_json["files"] 247 | output_file_tags = output_json["tags"] 248 | output_filepaths = [os.path.join(output_dir, f) for f in output_file_names] 249 | assert all(os.path.exists(fp) for fp in output_filepaths) 250 | output_file_urls = list() 251 | for name, tags, path in zip( 252 | output_file_names, output_file_tags, output_filepaths 253 | ): 254 | 255 | f = None 256 | file_kwargs = {"kind": "OUTPUT", "tags": tags, "blob": open(path, "rb")} 257 | if current_user_url != owner_url: 258 | file_kwargs["owner"] = owner_url 259 | 260 | if name in output_json["files_modified"]: 261 | file_kwargs["parent_file"] = input_file_urls[name] 262 | f = File.create(**file_kwargs) 263 | 264 | elif name in output_json["files_created"]: 265 | f = File.create(**file_kwargs) 266 | 267 | elif name in input_file_names: 268 | file_url = input_file_urls[name] 269 | file_tags = input_file_tags[name] 270 | file_tags.update(tags) 271 | f = File(file_url) 272 | f.tags = file_tags 273 | 274 | if f is not None: 275 | output_file_urls.append(f.url) 276 | 277 | job.output_files = output_file_urls 278 | job.status = "FINISHED" 279 | log("Finished job") 280 | 281 | except Exception as e: 282 | job.status = "FAILED" 283 | log("Job failed!!!", logging.ERROR) 284 | 285 | finally: 286 | # Update logs 287 | job.logs = job_logs 288 | 289 | # Cleanup 290 | shutil.rmtree(job_dir) 291 | 292 | return job_url, output_json, output_file_names 293 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlsploit/mlsploit-execution-backend/f3e32985fad28196466ea7d059e94c27cb748649/src/utils/__init__.py -------------------------------------------------------------------------------- /wait-for-rabbitmq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import re 5 | import time 6 | 7 | import pika 8 | 9 | 10 | MLSPLOIT_BROKER_URL = os.getenv("MLSPLOIT_BROKER_URL") 11 | parameters = pika.URLParameters(MLSPLOIT_BROKER_URL) 12 | 13 | wait = 5 14 | start = time.time() 15 | while True: 16 | print(f"Waiting {wait}s for RabbitMQ to come up...") 17 | time.sleep(wait) 18 | 19 | try: 20 | pika.BlockingConnection(parameters) 21 | break 22 | except pika.exceptions.AMQPConnectionError: 23 | pass 24 | else: 25 | end = time.time() 26 | print(f"RabbitMQ came up in ~{round(end - start)}s") 27 | --------------------------------------------------------------------------------