├── .dockerignore
├── .env.example
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── docker-compose.execution.yml
├── docker-compose.networking.yml
├── docker-services
    ├── flower
    │   ├── Dockerfile
    │   └── requirements.txt
    └── rabbitmq
    │   ├── Dockerfile
    │   ├── config_rabbit.sh
    │   ├── enabled_plugins
    │   ├── init.sh
    │   └── rabbitmq.config
├── docker-setup-execution.sh
├── docker-start-master.sh
├── docker-start-worker.sh
├── entrypoint-master.sh
├── entrypoint-worker.sh
├── env-set-token.sh
├── get-module-names.sh
├── modules.csv.example
├── requirements.txt
├── src
    ├── api.py
    ├── constants.py
    ├── mlsploit.py
    └── utils
    │   └── __init__.py
└── wait-for-rabbitmq.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | Pipfile
 3 | Pipfile.lock
 4 | 
 5 | *__pycache__
 6 | *.pyc
 7 | 
 8 | .scratch
 9 | docker-services
10 | 
11 | celerybeat-schedule
12 | celerybeat-schedule.db
13 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | MLSPLOIT_API_ADMIN_TOKEN=
2 | 
3 | MLSPLOIT_API_BASE_URL=http://api-host/api/v1
4 | MLSPLOIT_BACKEND_URL=redis://redis-host:6379
5 | MLSPLOIT_BROKER_URL=amqp://admin:password@rabbitmq-host:5672/mlsploit
6 | 
7 | MLSPLOIT_EXECUTION_JOB_CONCURRENCY=5
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | *.pyc
 4 | *__pycache__
 5 | .mypy_cache/
 6 | 
 7 | .idea/
 8 | 
 9 | # Local scratch directory
10 | .scratch
11 | 
12 | # Environments
13 | .env
14 | .venv
15 | Pipfile
16 | Pipfile.lock
17 | 
18 | celerybeat-schedule
19 | celerybeat-schedule.db
20 | 
21 | # Modules file
22 | /modules.csv
23 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker:stable
 2 | 
 3 | VOLUME /app
 4 | 
 5 | RUN apk upgrade --no-cache \
 6 |     && apk add --no-cache --update git python3 py-pip
 7 | 
 8 | ENV PYTHONUNBUFFERED 1
 9 | 
10 | COPY requirements.txt /
11 | RUN pip3 install --upgrade pip \
12 |     && pip install -r /requirements.txt \
13 |     && rm -rf /root/.cache/pip/wheels/*
14 | 
15 | ARG STAGE
16 | ARG BASE_DIR
17 | ENV CONTAINER_BUILD_STAGE=$STAGE
18 | ENV MLSPLOIT_DOCKER_HOST_BASE_DIR=$BASE_DIR
19 | 
20 | WORKDIR /app
21 | 
22 | ENTRYPOINT sh entrypoint-${CONTAINER_BUILD_STAGE}.sh
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Nilaksh Das
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Execution Orchestrator for MLsploit
 2 | 
 3 | ## ENVIRONMENT SETUP
 4 | 
 5 | The primary configuration settings of the execution module are stored 
 6 | inside the `.env` file, which you'll need to modify
 7 | according to your needs.
 8 | 
 9 | The first thing you should update is the `MLSPLOIT_API_ADMIN_TOKEN`, 
10 | which is necessary for accessing the data API for MLsploit.
11 | 
12 | ## RUN WITH DOCKER
13 | 
14 | We use `docker-compose` to orchestrate the setup and execution of the service.
15 | You only need to setup `docker` on your system and then run the following command:
16 | 
17 | ```bash
18 | $ bash docker-start-master.sh
19 | ```
20 | 
21 | This starts the execution service in *MASTER* mode. 
22 | The *MASTER* mode of the execution service runs the 
23 | scheduling, networking, monitoring and execution of the jobs.
24 | The *WORKER* mode, which only handles the execution of the jobs,
25 | can be run in parallel on another system using the following command:
26 | 
27 | ```bash
28 | $ bash docker-start-worker.sh
29 | ```
30 | 
31 | For the *WORKER* mode, you will need to update the host environment variables in the `.env` file
32 | to point to the *MASTER* node.
33 | 
34 | ## MANUAL SETUP
35 | 
36 | ### Install the dependencies
37 | 
38 | ```bash
39 | $ pip install -r requirements.txt
40 | ```
41 | 
42 | 
43 | ### Set up the environment
44 | 
45 | ```bash
46 | $ export MLSPLOIT_BROKER_URL='amqp://admin:password@localhost:5672/mlsploit'
47 | $ export MLSPLOIT_BACKEND_URL='redis://localhost:6379'
48 | $ export MLSPLOIT_API_ADMIN_TOKEN=<token here>
49 | ```
50 | 
51 | ## USAGE
52 | 
53 | ```bash
54 | $ celery worker -A mlsploit -B \
55 |     -l info \
56 |     -Ofair \
57 |     -c 5
58 | ```
59 | 
60 | The `-B` flag should be enabled only on one node worker since it queues the pending jobs.
61 | 


--------------------------------------------------------------------------------
/docker-compose.execution.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   mlsploit-execution-service:
 5 |     build:
 6 |       context: .
 7 |       args:
 8 |         - BASE_DIR=$PWD
 9 |     network_mode: host
10 |     extra_hosts:
11 |       - 'api-host:127.0.0.1'
12 |       - 'redis-host:127.0.0.1'
13 |       - 'rabbitmq-host:127.0.0.1'
14 |     volumes:
15 |       - /var/run/docker.sock:/var/run/docker.sock
16 |       - .:/app
17 |     env_file:
18 |       - .env
19 |     environment:
20 |       RUNNING_IN_DOCKER: 'true'
21 | 


--------------------------------------------------------------------------------
/docker-compose.networking.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   mlsploit-networking-backend-service:
 5 |     image: redis:latest
 6 |     ports:
 7 |       - 6379:6379
 8 | 
 9 |   mlsploit-networking-broker-service:
10 |     build: ./docker-services/rabbitmq
11 |     ports:
12 |       - 5672:5672
13 |       - 15672:15672
14 | 


--------------------------------------------------------------------------------
/docker-services/flower/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-alpine
 2 | 
 3 | COPY requirements.txt /requirements.txt
 4 | RUN pip install -r /requirements.txt
 5 | 
 6 | ENTRYPOINT ["flower"]
 7 | CMD ["--port=5555", \
 8 |      "--broker=amqp://admin:password@rabbitmq-service:5672/", \
 9 |      "--broker_api=http://admin:password@rabbitmq-service:15672/api/"]
10 | 


--------------------------------------------------------------------------------
/docker-services/flower/requirements.txt:
--------------------------------------------------------------------------------
1 | flower==0.9.3
2 | tornado==6.0.4
3 | 


--------------------------------------------------------------------------------
/docker-services/rabbitmq/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rabbitmq:latest
 2 | 
 3 | EXPOSE 5672
 4 | EXPOSE 15672
 5 | 
 6 | COPY enabled_plugins /etc/rabbitmq/
 7 | COPY rabbitmq.config /etc/rabbitmq/
 8 | 
 9 | COPY init.sh /
10 | COPY config_rabbit.sh /
11 | 
12 | RUN chmod 777 /init.sh /config_rabbit.sh
13 | 
14 | CMD ["/init.sh"]
15 | 


--------------------------------------------------------------------------------
/docker-services/rabbitmq/config_rabbit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script needs to be executed just once
 4 | if [ -f /$0.completed ] ; then
 5 |   echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] /$0.completed found, skipping run"
 6 |   exit 0
 7 | fi
 8 | 
 9 | # Wait for RabbitMQ startup
10 | for (( ; ; )) ; do
11 |   sleep 2
12 |   rabbitmqctl -q node_health_check > /dev/null 2>&1
13 |   if [ $? -eq 0 ] ; then
14 |     echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] rabbitmq is now running"
15 |     break
16 |   else
17 |     echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] waiting for rabbitmq startup"
18 |   fi
19 | done
20 | 
21 | # Execute RabbitMQ config commands here
22 | 
23 | # Add vhosts
24 | rabbitmqctl add_vhost mlsploit
25 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] vhosts created"
26 | 
27 | # Set permissions for vhosts
28 | rabbitmqctl set_permissions -p mlsploit admin ".*" ".*" ".*"
29 | echo "`date '+%Y-%m-%d %H:%M:%S'`.000 [$0] permissions set for vhosts"
30 | 
31 | # Create mark so script is not run again
32 | touch /$0.completed
33 | 


--------------------------------------------------------------------------------
/docker-services/rabbitmq/enabled_plugins:
--------------------------------------------------------------------------------
1 | [rabbitmq_management, rabbitmq_management_visualiser].


--------------------------------------------------------------------------------
/docker-services/rabbitmq/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | /config_rabbit.sh & rabbitmq-server $@
4 | 


--------------------------------------------------------------------------------
/docker-services/rabbitmq/rabbitmq.config:
--------------------------------------------------------------------------------
 1 | [
 2 |  {rabbit,
 3 |   [
 4 |    %% The default "guest" user is only permitted to access the server
 5 |    %% via a loopback interface (e.g. localhost).
 6 |    %% {loopback_users, [<<"guest">>]},
 7 |    %%
 8 |    %% Uncomment the following line if you want to allow access to the
 9 |    %% guest user from anywhere on the network.
10 |    {loopback_users, ["guest"]},
11 |    {default_vhost,       "/"},
12 |    {default_user,        "admin"},
13 |    {default_pass,        "password"},
14 |    {default_permissions, [".*", ".*", ".*"]}
15 |   ]}
16 | ].
17 | 


--------------------------------------------------------------------------------
/docker-setup-execution.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)"
 4 | 
 5 | if [[ ! -f .env ]]; then
 6 |     cp .env.example .env
 7 | fi
 8 | 
 9 | MASTER_DOCKER_PROJECT_NAME=mlsploit-execution-backend-master
10 | WORKER_DOCKER_PROJECT_NAME=mlsploit-execution-backend-worker
11 | 
12 | docker-compose \
13 |     -p ${MASTER_DOCKER_PROJECT_NAME} \
14 |     -f docker-compose.execution.yml \
15 |     -f docker-compose.networking.yml \
16 |     build \
17 |     --build-arg STAGE=master
18 | 
19 | docker-compose \
20 |     -p ${WORKER_DOCKER_PROJECT_NAME} \
21 |     -f docker-compose.execution.yml \
22 |     build \
23 |     --build-arg STAGE=worker
24 | 


--------------------------------------------------------------------------------
/docker-start-master.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)"
 4 | 
 5 | DOCKER_PROJECT_NAME=mlsploit-execution-backend-master
 6 | 
 7 | docker-compose \
 8 |     -p ${DOCKER_PROJECT_NAME} \
 9 |     -f docker-compose.execution.yml \
10 |     -f docker-compose.networking.yml \
11 |     up
12 | 


--------------------------------------------------------------------------------
/docker-start-worker.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)"
 4 | 
 5 | DOCKER_PROJECT_NAME=mlsploit-execution-backend-worker
 6 | 
 7 | docker-compose \
 8 |     -p ${DOCKER_PROJECT_NAME} \
 9 |     -f docker-compose.execution.yml \
10 |     up
11 | 


--------------------------------------------------------------------------------
/entrypoint-master.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -a && . .env && set +a
 4 | 
 5 | if [[ -z "${MLSPLOIT_API_ADMIN_TOKEN}" ]]
 6 | then
 7 |     echo "[ERROR] MLSPLOIT_API_ADMIN_TOKEN is not set"
 8 |     exit 1
 9 | fi
10 | 
11 | ./wait-for-rabbitmq.py || exit 1
12 | 
13 | cd ./src
14 | 
15 | CELERY_ID=mlsploit.master@%h
16 | 
17 | celery worker -A mlsploit -B \
18 |       -l info \
19 |       -Ofair \
20 |       -n ${CELERY_ID} \
21 |       -Q housekeeping \
22 |       -c ${MLSPLOIT_EXECUTION_JOB_CONCURRENCY}
23 | 


--------------------------------------------------------------------------------
/entrypoint-worker.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -a && . .env && set +a
 4 | 
 5 | if [[ -z "${MLSPLOIT_API_ADMIN_TOKEN}" ]]
 6 | then
 7 |     echo "[ERROR] MLSPLOIT_API_ADMIN_TOKEN is not set"
 8 |     exit 1
 9 | fi
10 | 
11 | export MLSPLOIT_MODULES=$(./get-module-names.sh)
12 | 
13 | ./wait-for-rabbitmq.py || exit 1
14 | 
15 | cd ./src
16 | 
17 | DATE_STR=$(date +%s)
18 | RAND_STR=$(python3 -c "from __future__ import print_function; from coolname import generate_slug; print(generate_slug(2))")
19 | CELERY_ID=mlsploit.worker.${DATE_STR}.${RAND_STR}@%h
20 | 
21 | celery worker -A mlsploit \
22 |       -Ofair \
23 |       -l info \
24 |       -n ${CELERY_ID} \
25 |       -Q ${MLSPLOIT_MODULES:-celery} \
26 |       -c ${MLSPLOIT_EXECUTION_JOB_CONCURRENCY}
27 | 


--------------------------------------------------------------------------------
/env-set-token.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd -P)"
 4 | 
 5 | if [[ ! -f .env ]]; then
 6 |     cp .env.example .env
 7 | fi
 8 | 
 9 | function usage() {
10 |     echo "usage: bash env-set-token.sh <MLSPLOIT_API_ADMIN_TOKEN>"
11 | }
12 | 
13 | function update_env() {
14 |     KEY="$1"; VAL="$2"; LINE_NUM=$(grep -nm 1 "^${KEY}=" .env | cut -f1 -d:)
15 |     (sed "${LINE_NUM}s/.*/${KEY}=${VAL}/" .env > .env.tmp) && mv .env.tmp .env
16 | }
17 | 
18 | if [[ -z $1 ]]; then
19 |     usage
20 |     exit 1
21 | fi
22 | 
23 | MLSPLOIT_API_ADMIN_TOKEN="$1"
24 | update_env MLSPLOIT_API_ADMIN_TOKEN "$MLSPLOIT_API_ADMIN_TOKEN"
25 | 


--------------------------------------------------------------------------------
/get-module-names.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | MODULES=""
 4 | OLD_IFS=$IFS
 5 | if [[ -f modules.csv ]]; then
 6 |     while IFS=, read -r NAME REPO BRANCH; do
 7 |         MODULES="$MODULES,$NAME"
 8 |     done < modules.csv
 9 | fi
10 | IFS=$OLD_IFS
11 | echo ${MODULES#,}
12 | 


--------------------------------------------------------------------------------
/modules.csv.example:
--------------------------------------------------------------------------------
1 | foolbox,https://github.com/mlsploit/foolbox-mlsploit.git,master
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cachetools==4.0.0
 2 | celery==4.4.1
 3 | coolname==1.1.0
 4 | docker==4.2.0
 5 | gitpython==3.1.0
 6 | pika==1.1.0
 7 | redis==3.4.1
 8 | requests==2.23.0
 9 | tornado==6.0.4
10 | 


--------------------------------------------------------------------------------
/src/api.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from urllib.parse import urlparse, ParseResult
  4 | 
  5 | from cachetools import TTLCache
  6 | import requests
  7 | 
  8 | 
  9 | API_BASE_URL = os.getenv("MLSPLOIT_API_BASE_URL")
 10 | assert API_BASE_URL, """
 11 |     MLSPLOIT_API_BASE_URL environment variable not found.
 12 |     $ export MLSPLOIT_API_BASE_URL='http://localhost:8000/api/v1'
 13 |     """
 14 | 
 15 | 
 16 | class RestClient:
 17 |     _token = None
 18 |     _cache = TTLCache(maxsize=1000, ttl=2)
 19 | 
 20 |     @classmethod
 21 |     def _make_auth_header(cls):
 22 |         headers = dict()
 23 |         if cls._token is not None:
 24 |             headers["Authorization"] = "Token %s" % cls._token
 25 |         return headers
 26 | 
 27 |     @classmethod
 28 |     def set_token(cls, token):
 29 |         cls._token = token
 30 | 
 31 |     @classmethod
 32 |     def get(cls, url, params=None, headers=None):
 33 |         params = params or dict()
 34 |         headers = headers or dict()
 35 |         headers.update(cls._make_auth_header())
 36 | 
 37 |         key = (url, tuple(params.items()), tuple(headers.items()))
 38 | 
 39 |         if key not in cls._cache:
 40 |             r = requests.get(url, params=params, headers=headers)
 41 | 
 42 |             cls._cache[key] = r.text
 43 |         data = json.loads(cls._cache[key])
 44 | 
 45 |         return data
 46 | 
 47 |     @classmethod
 48 |     def post(cls, url, payload, files=None, headers=None):
 49 |         cls._cache.clear()
 50 | 
 51 |         headers = headers or dict()
 52 |         headers.update(cls._make_auth_header())
 53 | 
 54 |         url += "/" if not url.endswith("/") else ""
 55 | 
 56 |         r = requests.post(url, data=payload, files=files, headers=headers)
 57 | 
 58 |         data = json.loads(r.text)
 59 |         return data
 60 | 
 61 |     @classmethod
 62 |     def patch(cls, url, payload, files=None, headers=None):
 63 |         cls._cache.clear()
 64 | 
 65 |         headers = headers or dict()
 66 |         headers.update(cls._make_auth_header())
 67 | 
 68 |         url += "/" if not url.endswith("/") else ""
 69 | 
 70 |         r = requests.patch(url, data=payload, files=files, headers=headers)
 71 | 
 72 |         data = json.loads(r.text)
 73 |         return data
 74 | 
 75 |     @staticmethod
 76 |     def make_path(*args):
 77 |         p = "/".join(map(lambda x: x.strip("/"), args))
 78 |         return p
 79 | 
 80 | 
 81 | class ApiDataModel(object):
 82 |     _endpoint = None
 83 |     _expandable = dict()
 84 |     _json_props = list()
 85 | 
 86 |     def __init__(self, url):
 87 |         endpoint = urlparse(self._endpoint)
 88 |         endpoint = ParseResult("", *endpoint[1:]).geturl()
 89 |         assert endpoint in url
 90 |         super(ApiDataModel, self).__setattr__("_url", url)
 91 | 
 92 |     def __getattr__(self, item):
 93 |         data = RestClient.get(self._url)
 94 | 
 95 |         val = data[item]
 96 |         if item in self._expandable:
 97 |             if type(self._expandable[item]) is list:
 98 |                 klass = self._expandable[item][0]
 99 |                 val = list(klass(v) for v in val)
100 | 
101 |             elif val is not None:
102 |                 klass = self._expandable[item]
103 |                 val = klass(val)
104 | 
105 |         elif item in self._json_props:
106 |             val = json.loads(val)
107 | 
108 |         return val
109 | 
110 |     def __setattr__(self, key, value):
111 |         if key in self._json_props:
112 |             value = json.dumps(value)
113 | 
114 |         RestClient.patch(self.url, {key: value})
115 | 
116 |     def __repr__(self):
117 |         return self._url
118 | 
119 |     @property
120 |     def url(self):
121 |         return self._url
122 | 
123 |     @classmethod
124 |     def from_id(cls, id_):
125 |         url = RestClient.make_path(cls._endpoint, str(id_)) + "/"
126 |         return cls(url)
127 | 
128 |     @classmethod
129 |     def create(cls, **kwargs):
130 |         files = None
131 |         if "blob" in kwargs:
132 |             files = {"blob": kwargs["blob"]}
133 |             del kwargs["blob"]
134 | 
135 |         for k in cls._json_props:
136 |             if k in kwargs and type(kwargs[k]) in {dict, list}:
137 |                 kwargs[k] = json.dumps(kwargs[k])
138 | 
139 |         r = RestClient.post(cls._endpoint, payload=kwargs, files=files)
140 | 
141 |         return cls(r["url"])
142 | 
143 |     @classmethod
144 |     def get_all(cls, params=None):
145 |         all_data = RestClient.get(cls._endpoint, params=params)
146 | 
147 |         items = list()
148 | 
149 |         if type(all_data) is not list:
150 |             return items
151 | 
152 |         for item_data in all_data:
153 |             items.append(cls(item_data["url"]))
154 | 
155 |         return items
156 | 
157 | 
158 | class Module(ApiDataModel):
159 |     _endpoint = RestClient.make_path(API_BASE_URL, "modules")
160 |     _json_props = ["config"]
161 | 
162 | 
163 | class Function(ApiDataModel):
164 |     _endpoint = RestClient.make_path(API_BASE_URL, "functions")
165 |     _expandable = {"module": Module}
166 |     _json_props = ["options", "optional_filetypes", "output_tags"]
167 | 
168 | 
169 | class User(ApiDataModel):
170 |     _endpoint = RestClient.make_path(API_BASE_URL, "users")
171 | 
172 |     @classmethod
173 |     def get_current(cls):
174 |         try:
175 |             current_user_endpoint = RestClient.make_path(
176 |                 API_BASE_URL.replace("/api/v1", ""), "auth", "user"
177 |             )
178 |             current_user_data = RestClient.get(current_user_endpoint)
179 |             current_user_url = current_user_data["url"]
180 |             return cls(current_user_url)
181 |         except:
182 |             return None
183 | 
184 | 
185 | class File(ApiDataModel):
186 |     _endpoint = RestClient.make_path(API_BASE_URL, "files")
187 |     _expandable = {"owner": User}
188 |     _json_props = ["tags"]
189 | 
190 | 
191 | class Task(ApiDataModel):
192 |     _endpoint = RestClient.make_path(API_BASE_URL, "tasks")
193 |     _expandable = {"owner": User, "function": Function}
194 |     _json_props = ["arguments"]
195 | 
196 | 
197 | class Run(ApiDataModel):
198 |     _endpoint = RestClient.make_path(API_BASE_URL, "runs")
199 |     _expandable = {"owner": User, "files": [File]}
200 | 
201 | 
202 | class Job(ApiDataModel):
203 |     _endpoint = RestClient.make_path(API_BASE_URL, "jobs")
204 |     _expandable = {"owner": User, "task": Task, "run": Run, "output_files": [File]}
205 |     _json_props = ["output"]
206 | 
207 |     @property
208 |     def parent_job(self):
209 |         parent_job_url = self.__getattr__("parent_job")
210 |         if parent_job_url is None:
211 |             return None
212 | 
213 |         return Job(parent_job_url)
214 | 
215 |     @classmethod
216 |     def get_all_actionable(cls):
217 |         all_pending_jobs = cls.get_all(params={"status": "PENDING"})
218 | 
219 |         return list(
220 |             filter(
221 |                 lambda j: j.parent_job is None or j.parent_job.status == "FINISHED",
222 |                 all_pending_jobs,
223 |             )
224 |         )
225 | 


--------------------------------------------------------------------------------
/src/constants.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 5 | SCRATCH_DIR = os.path.join(BASE_DIR, ".scratch")
 6 | BASE_DIR_DOCKER = os.getenv("MLSPLOIT_DOCKER_HOST_BASE_DIR") or BASE_DIR
 7 | SCRATCH_DIR_DOCKER = os.path.join(BASE_DIR_DOCKER, ".scratch")
 8 | os.makedirs(SCRATCH_DIR, exist_ok=True)
 9 | 
10 | APP_NAME = "mlsploit"
11 | 
12 | BROKER_URL = os.getenv("MLSPLOIT_BROKER_URL")
13 | assert BROKER_URL, """
14 |     MLSPLOIT_BROKER_URL environment variable not found.
15 |     $ export MLSPLOIT_BROKER_URL='amqp://admin:password@localhost:5672/mlsploit'
16 |     """
17 | 
18 | BACKEND_URL = os.getenv("MLSPLOIT_BACKEND_URL")
19 | assert BACKEND_URL, """
20 |     MLSPLOIT_BACKEND_URL environment variable not found.
21 |     $ export MLSPLOIT_BACKEND_URL='redis://localhost:6379'
22 |     """
23 | 
24 | API_ADMIN_TOKEN = os.getenv("MLSPLOIT_API_ADMIN_TOKEN")
25 | assert API_ADMIN_TOKEN, """
26 |     MLSPLOIT_API_ADMIN_TOKEN environment variable not found.
27 |     $ export MLSPLOIT_API_ADMIN_TOKEN='dd6f003f47b68e3fcd24fe5b3cade72168557d9f'
28 |     """
29 | 
30 | EXECUTION_MODE = os.getenv("CONTAINER_BUILD_STAGE")
31 | 
32 | BUILD_MODULES = os.getenv("MLSPLOIT_MODULES")
33 | BUILD_MODULES = list(filter(len, (BUILD_MODULES or "").split(",")))
34 | 


--------------------------------------------------------------------------------
/src/mlsploit.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import logging
  4 | import os
  5 | import shutil
  6 | import tempfile
  7 | import time
  8 | from urllib.request import urlretrieve
  9 | 
 10 | from celery import Celery
 11 | from celery.signals import celeryd_after_setup
 12 | import docker
 13 | import git
 14 | 
 15 | from api import File, Job, Module, RestClient, User
 16 | from constants import *
 17 | 
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | logger.setLevel(logging.DEBUG)
 21 | 
 22 | app = Celery(APP_NAME, broker=BROKER_URL, backend=BACKEND_URL)
 23 | app.conf.update(worker_prefetch_multiplier=1, worker_send_task_events=True)
 24 | app.conf.beat_schedule = {
 25 |     "fetch-jobs-every-10-seconds": {
 26 |         "task": "mlsploit.fetch_actionable_jobs",
 27 |         "options": {"queue": "housekeeping"},
 28 |         "schedule": 10.0,
 29 |     }
 30 | }
 31 | 
 32 | RestClient.set_token(API_ADMIN_TOKEN)
 33 | 
 34 | 
 35 | def check_master_online():
 36 |     master_online = False
 37 |     workers = app.control.inspect().active() or dict()
 38 |     for worker_name in workers.keys():
 39 |         master_online = master_online or worker_name.startswith("mlsploit.master")
 40 |     return master_online
 41 | 
 42 | 
 43 | def setup_docker_images():
 44 |     logger.info("Fetching all modules")
 45 |     modules, num_built = Module.get_all(), 0
 46 |     logger.debug(modules)
 47 | 
 48 |     logger.info("Initializing docker client")
 49 |     client = docker.APIClient(base_url="unix://var/run/docker.sock")
 50 |     logger.debug(client)
 51 | 
 52 |     for module in modules:
 53 |         name = module.name
 54 | 
 55 |         if "*" in BUILD_MODULES or name in BUILD_MODULES:
 56 |             logger.info("-" * 20)
 57 | 
 58 |             repo_url = module.repo_url
 59 |             repo_branch = module.repo_branch
 60 |             repo_dir = tempfile.mkdtemp()
 61 | 
 62 |             logger.debug(f"{name}: repo_url    = {repo_url}")
 63 |             logger.debug(f"{name}: repo_branch = {repo_branch}")
 64 |             logger.debug(f"{name}: repo_dir    = {repo_dir}")
 65 | 
 66 |             try:
 67 |                 git.Repo.clone_from(
 68 |                     repo_url, repo_dir, branch=repo_branch, recursive=True
 69 |                 )
 70 | 
 71 |                 logger.info(
 72 |                     f"{name}: Finished cloning repository, starting docker build"
 73 |                 )
 74 | 
 75 |                 build_stream = client.build(path=repo_dir, tag=name, decode=True)
 76 |                 for chunk in build_stream:
 77 |                     if "stream" in chunk:
 78 |                         for line in chunk["stream"].splitlines():
 79 |                             logger.debug(f"{name}: {line}")
 80 | 
 81 |                 num_built += 1
 82 | 
 83 |             except Exception as e:
 84 |                 logger.error(f"{name}: Failed to build docker image", exc_info=True)
 85 | 
 86 |             shutil.rmtree(repo_dir)
 87 |             logger.info("-" * 20)
 88 | 
 89 |     logger.info(f"Built docker images for {num_built} modules")
 90 | 
 91 | 
 92 | @celeryd_after_setup.connect
 93 | def startup(sender, instance, **kwargs):
 94 |     logger.debug(f"EXECUTION_MODE     = {EXECUTION_MODE}")
 95 |     logger.debug(f"SCRATCH_DIR        = {SCRATCH_DIR}")
 96 |     logger.debug(f"SCRATCH_DIR_DOCKER = {SCRATCH_DIR_DOCKER}")
 97 | 
 98 |     if EXECUTION_MODE == "master":
 99 |         wait = 5
100 |         logger.info(f"Waiting {wait}s for other services to spin up")
101 |         time.sleep(wait)
102 | 
103 |     else:
104 |         logger.debug(f"BUILD_MODULES      = {BUILD_MODULES}")
105 | 
106 |         master_online = check_master_online()
107 |         while not master_online:
108 |             wait = 5  # `check_master_online` already blocks on networking services
109 |             logger.info(f"Waiting {wait}s for mlsploit.master to come online")
110 |             time.sleep(wait)
111 | 
112 |             master_online = check_master_online()
113 |         logger.info("Detected mlsploit.master is online!")
114 | 
115 |         if len(BUILD_MODULES) > 0:
116 |             try:
117 |                 logger.info("Setting up modules")
118 |                 setup_docker_images()
119 |             except Exception as e:
120 |                 logger.error("Setting up MLsploit modules failed", exc_info=True)
121 |         else:
122 |             logger.info("No modules to build!")
123 | 
124 | 
125 | @app.task
126 | def fetch_actionable_jobs():
127 |     jobs = Job.get_all_actionable()
128 | 
129 |     for job in jobs:
130 |         job_module = job.task.function.module.name
131 |         job.status = "QUEUED"
132 |         promise = perform_job.s(job.id)
133 |         promise.apply_async(queue=job_module)
134 | 
135 |     return [job.url for job in jobs]
136 | 
137 | 
138 | @app.task(bind=True)
139 | def perform_job(self, job_id):
140 |     job_logs = str()
141 |     job = Job.from_id(job_id)
142 |     job.status = "RUNNING"
143 | 
144 |     def log(logtxt, lvl=logging.INFO):
145 |         nonlocal job_logs
146 | 
147 |         logtxt = str(logtxt)
148 |         logtxt = logtxt.strip(" \n")
149 | 
150 |         job_logs += logtxt + "\n"
151 |         job.logs = job_logs
152 |         if lvl == logging.ERROR:
153 |             logger.error(f"{job_id}: {logtxt}", exc_info=True)
154 |         else:
155 |             logger.log(lvl, f"{job_id}: {logtxt}")
156 | 
157 |     log(("-" * 20) + "\n")
158 | 
159 |     try:
160 |         output_json, output_file_names = dict(), list()
161 | 
162 |         # Get all data from API at once since it is time-cached
163 |         current_user = User.get_current()
164 |         current_user_url = current_user.url if current_user is not None else None
165 |         job_url = job.url
166 |         module = job.task.function.module
167 |         module_name = module.name
168 |         function_name = job.task.function.name
169 |         arguments = job.task.arguments
170 |         owner_url = job.owner.url
171 |         parent_job = job.parent_job
172 |         input_files = job.run.files if parent_job is None else parent_job.output_files
173 |         input_file_names = [f.name for f in input_files]
174 |         input_file_tags = {f.name: f.tags for f in input_files}
175 |         input_file_urls = {f.name: f.url for f in input_files}
176 |         input_file_blob_urls = {f.name: f.blob_url for f in input_files}
177 | 
178 |         log(f"Running function '{function_name}' from module '{module_name}'")
179 |         log("Arguments = " + str(arguments), logging.DEBUG)
180 |         log("Input Files = " + ", ".join(input_file_names), logging.DEBUG)
181 | 
182 |         # Create job folder with input and output directories
183 |         job_dir = os.path.join(SCRATCH_DIR, "jobs", str(job_id))
184 |         input_dir = os.path.join(job_dir, "input")
185 |         output_dir = os.path.join(job_dir, "output")
186 |         job_dir_docker = os.path.join(SCRATCH_DIR_DOCKER, "jobs", str(job_id))
187 |         input_dir_docker = os.path.join(job_dir_docker, "input")
188 |         output_dir_docker = os.path.join(job_dir_docker, "output")
189 | 
190 |         original_umask = os.umask(0)
191 |         os.makedirs(job_dir, exist_ok=True)
192 |         os.makedirs(input_dir, exist_ok=True)
193 |         os.makedirs(output_dir, exist_ok=True)
194 |         os.umask(original_umask)
195 | 
196 |         # Download input files
197 |         for name, url in input_file_blob_urls.items():
198 |             urlretrieve(url, os.path.join(input_dir, name))
199 | 
200 |         # Create input JSON file
201 |         input_json_dict = {
202 |             "name": function_name,
203 |             "num_files": len(input_file_names),
204 |             "files": input_file_names,
205 |             "options": arguments,
206 |             "tags": [input_file_tags[name] for name in input_file_names],
207 |         }
208 |         input_json_filepath = os.path.join(input_dir, "input.json")
209 |         with open(input_json_filepath, "w") as f:
210 |             json.dump(input_json_dict, f)
211 | 
212 |         # Run docker image
213 |         client = docker.from_env()
214 | 
215 |         log("Starting job...")
216 |         container = client.containers.run(
217 |             "%s:latest" % module_name,
218 |             detach=True,
219 |             stdout=True,
220 |             stderr=True,
221 |             volumes={
222 |                 input_dir_docker: {"bind": "/mnt/input", "mode": "rw"},
223 |                 output_dir_docker: {"bind": "/mnt/output", "mode": "rw"},
224 |             },
225 |             environment=["PYTHONUNBUFFERED=1"],
226 |         )
227 | 
228 |         for line in container.logs(stream=True):
229 |             log("[JOB] " + line.decode("utf-8"), logging.DEBUG)
230 | 
231 |         container_exit_status = container.wait()
232 |         container_exit_status = container_exit_status["StatusCode"]
233 |         container.remove()
234 | 
235 |         assert (
236 |             container_exit_status == 0
237 |         ), f"Module container exited with status code {container_exit_status}"
238 | 
239 |         # Update output for job
240 |         output_json_filepath = os.path.join(output_dir, "output.json")
241 |         with open(output_json_filepath, "r") as f:
242 |             output_json = json.load(f)
243 |         job.output = output_json
244 | 
245 |         # Upload output files
246 |         output_file_names = output_json["files"]
247 |         output_file_tags = output_json["tags"]
248 |         output_filepaths = [os.path.join(output_dir, f) for f in output_file_names]
249 |         assert all(os.path.exists(fp) for fp in output_filepaths)
250 |         output_file_urls = list()
251 |         for name, tags, path in zip(
252 |             output_file_names, output_file_tags, output_filepaths
253 |         ):
254 | 
255 |             f = None
256 |             file_kwargs = {"kind": "OUTPUT", "tags": tags, "blob": open(path, "rb")}
257 |             if current_user_url != owner_url:
258 |                 file_kwargs["owner"] = owner_url
259 | 
260 |             if name in output_json["files_modified"]:
261 |                 file_kwargs["parent_file"] = input_file_urls[name]
262 |                 f = File.create(**file_kwargs)
263 | 
264 |             elif name in output_json["files_created"]:
265 |                 f = File.create(**file_kwargs)
266 | 
267 |             elif name in input_file_names:
268 |                 file_url = input_file_urls[name]
269 |                 file_tags = input_file_tags[name]
270 |                 file_tags.update(tags)
271 |                 f = File(file_url)
272 |                 f.tags = file_tags
273 | 
274 |             if f is not None:
275 |                 output_file_urls.append(f.url)
276 | 
277 |         job.output_files = output_file_urls
278 |         job.status = "FINISHED"
279 |         log("Finished job")
280 | 
281 |     except Exception as e:
282 |         job.status = "FAILED"
283 |         log("Job failed!!!", logging.ERROR)
284 | 
285 |     finally:
286 |         # Update logs
287 |         job.logs = job_logs
288 | 
289 |         # Cleanup
290 |         shutil.rmtree(job_dir)
291 | 
292 |     return job_url, output_json, output_file_names
293 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlsploit/mlsploit-execution-backend/f3e32985fad28196466ea7d059e94c27cb748649/src/utils/__init__.py


--------------------------------------------------------------------------------
/wait-for-rabbitmq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import re
 5 | import time
 6 | 
 7 | import pika
 8 | 
 9 | 
10 | MLSPLOIT_BROKER_URL = os.getenv("MLSPLOIT_BROKER_URL")
11 | parameters = pika.URLParameters(MLSPLOIT_BROKER_URL)
12 | 
13 | wait = 5
14 | start = time.time()
15 | while True:
16 |     print(f"Waiting {wait}s for RabbitMQ to come up...")
17 |     time.sleep(wait)
18 | 
19 |     try:
20 |         pika.BlockingConnection(parameters)
21 |         break
22 |     except pika.exceptions.AMQPConnectionError:
23 |         pass
24 |     else:
25 |         end = time.time()
26 |         print(f"RabbitMQ came up in ~{round(end - start)}s")
27 | 


--------------------------------------------------------------------------------