├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── bin ├── backend.py ├── env.py ├── model.py ├── server.py ├── visualizer.py └── workflow.py ├── cli ├── cancel.sh ├── create.sh ├── delete.sh ├── download.sh ├── get.sh ├── launch.sh ├── log.sh ├── query.sh └── upload.sh ├── client ├── app.js ├── css │ └── style.css ├── favicon.ico ├── index.html └── views │ ├── model.html │ ├── task.html │ ├── tasks.html │ ├── visualizer.html │ ├── workflow.html │ └── workflows.html ├── helm ├── Chart.yaml ├── README.md ├── gen-secret.sh ├── templates │ ├── _helpers.tpl │ ├── nextflow-api.yaml │ └── pvc.yaml └── values.yaml ├── requirements.txt └── scripts ├── convert-json-pkl.py ├── db-backup.sh ├── db-restore.sh ├── db-startup.sh ├── deploy.sh ├── kube-cancel.sh ├── kube-config.sh ├── kube-run.sh ├── kube-save.sh ├── startup-local.sh ├── startup-nautilus.sh └── startup-palmetto.sh /.gitignore: -------------------------------------------------------------------------------- 1 | _models 2 | _trace 3 | _workflows 4 | 5 | helm/.helmignore 6 | helm/.nextflow.log 7 | helm/.sops.yaml 8 | helm/templates/secret.yaml 9 | 10 | .ipynb_checkpoints 11 | *.json 12 | .nextflow* 13 | *.pkl 14 | *.pyc 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV NXF_VER="21.04.3" 5 | EXPOSE 8080 6 | EXPOSE 27017 7 | 8 | # install package dependencies 9 | RUN apt-get update -qq \ 10 | && apt-get install -qq -y \ 11 | apt-transport-https \ 12 | apt-utils \ 13 | ca-certificates \ 14 | cron \ 15 | curl \ 16 | git \ 17 | mongodb \ 18 | openjdk-8-jre \ 19 | python3.7 \ 20 | python3-pip \ 21 | zip 22 | 23 | # change python to refer to python 3.7 24 | RUN rm /usr/bin/python3 && ln -s python3.7 /usr/bin/python3 25 | 26 | # install kubectl 27 | RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ 28 | && echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list \ 29 | && apt-get update -qq \ 30 | && apt-get install -qq -y kubectl 31 | 32 | # install nextflow 33 | RUN curl -s https://get.nextflow.io | bash \ 34 | && mv nextflow /usr/local/bin \ 35 | && nextflow info 36 | 37 | # install nextflow-api from build context 38 | WORKDIR /opt/nextflow-api 39 | 40 | COPY . . 41 | 42 | # install python dependencies 43 | RUN python3 -m pip install --upgrade pip 44 | RUN python3 -m pip install -r requirements.txt 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Fan Jiang, Cole McKnight, Benjamin Shealy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nextflow-API 2 | 3 | Nextflow-API is a web application and REST API for submitting and monitoring Nextflow pipelines on a variety of execution environments. The REST API is implemented in Python using the ([Tornado](https://www.tornadoweb.org/en/stable/)) framework, and the client-side application is implemented using [AngularJS](https://angularjs.org/). Nextflow-API can be deployed locally or to a Kubernetes cluster. There is also experimental support for PBS, and Nextflow-API can be extended to other Nextflow-supported executors upon request. 4 | 5 | ## Deployment 6 | 7 | ### Local 8 | 9 | Install the dependencies as shown in the [Dockerfile](docker/Dockerfile). Depending on your setup, you may not need to install `mongodb` or `kubectl`. You may also prefer to install the Python dependencies in an Anaconda environment: 10 | ```bash 11 | conda create -n nextflow-api python=3.7 12 | conda activate nextflow-api 13 | pip install -r requirements.txt 14 | ``` 15 | 16 | Use `scripts/startup-local.sh` to deploy Nextflow-API locally, although you may need to modify the script to fit your environment. 17 | 18 | ### Palmetto 19 | 20 | To use Nexflow-API on the Palmetto cluster, you will need to provision a Login VM, install the Python dependencies in an Anaconda environment, and either request a MongoDB allocation or use the `file` backend. Use `scripts/startup-palmetto.sh` to deploy Nextflow-API, although you may need to modify the script to fit your environment. You will only be able to access the web interface from the campus network or the Clemson VPN. For long-running deployments, run the script within a screen on your Login VM. 21 | 22 | ### Kubernetes 23 | 24 | Refer to the [helm](helm/README.md) for instructions on how to deploy Nextflow-API to a Kubernetes cluster. 25 | 26 | ## Usage 27 | 28 | The core of Nextflow-API is a REST API which provides an interface to run Nextflow pipelines and can be integrated with third-party services. Nextflow-API provides a collection of [CLI scripts](cli) to demonstrate how to use the API, as well as a web interface for end users. 29 | 30 | ### Backends 31 | 32 | Nextflow-API stores workflow runs and tasks in one of several "backend" formats. The `file` backend stores the data in a single `pkl` file, which is ideal for local testing. The `mongo` backend stores the data in a Mongo database, which is ideal for production. 33 | 34 | ### API Endpoints 35 | 36 | | Endpoint | Method | Description | 37 | |--------------------------------|--------|---------------------------------------------| 38 | | `/api/workflows` | GET | List all workflow instances | 39 | | `/api/workflows` | POST | Create a workflow instance | 40 | | `/api/workflows/{id}` | GET | Get a workflow instance | 41 | | `/api/workflows/{id}` | POST | Update a workflow instance | 42 | | `/api/workflows/{id}` | DELETE | Delete a workflow instance | 43 | | `/api/workflows/{id}/upload` | POST | Upload input files to a workflow instance | 44 | | `/api/workflows/{id}/launch` | POST | Launch a workflow instance | 45 | | `/api/workflows/{id}/log` | GET | Get the log of a workflow instance | 46 | | `/api/workflows/{id}/download` | GET | Download the output data as a tarball | 47 | | `/api/tasks` | GET | List all tasks | 48 | | `/api/tasks` | POST | Save a task (used by Nextflow) | 49 | 50 | ### Lifecycle 51 | 52 | First, the user calls the API to create a workflow instance. Along with the API call, the user must provide the __name of the Nextflow pipeline__. The payload of the API call is shown below. 53 | 54 | ```json 55 | { 56 | "pipeline": "systemsgenetics/kinc-nf" 57 | } 58 | ``` 59 | 60 | Then the user uploads the input files (including `nextflow.config`) for the workflow instance. 61 | 62 | After the input and config files in place, the user can launch the workflow. The launch starts with uploading of the input files to `/input` on the PVC. The jobs running as distributed pods in k8s will read the input data from here, and work together in the dedicated workspace prefixed with ``. 63 | 64 | Once the workflow is launched, the status and log will be available via the API. Ideally, higher-level services can call the API periodically to fetch the latest log of the workflow instance. 65 | 66 | After the run is done, the user can call the API to download the output files. The output files are placed in `/output` on the PVC. The API will compress the directory as a `tar.gz` file for downloading. 67 | 68 | The user can call the API to delete the workflow instance and purge its data once done with it. 69 | 70 | ### Resource Usage Monitoring and Prediction 71 | 72 | Nextflow-API automatically collects resource usage data generated by Nextflow, including metrics like runtime, CPU utilization, memory usage, and bytes read/written. Through the web interface you can download this data as CSV files, create visualizations, and train prediction models for specific pipelines and processes. These features were adapted from [tesseract](https://github.com/bentsherman/tesseract), a command-line tool for resource prediction. -------------------------------------------------------------------------------- /bin/backend.py: -------------------------------------------------------------------------------- 1 | import motor.motor_tornado 2 | import multiprocessing as mp 3 | import pickle 4 | import pymongo 5 | 6 | 7 | 8 | class Backend(): 9 | def __init__(self): 10 | pass 11 | 12 | def initialize(self): 13 | pass 14 | 15 | async def workflow_query(self, page, page_size): 16 | raise NotImplementedError() 17 | 18 | async def workflow_create(self, workflow): 19 | raise NotImplementedError() 20 | 21 | async def workflow_get(self, id): 22 | raise NotImplementedError() 23 | 24 | async def workflow_update(self, id, workflow): 25 | raise NotImplementedError() 26 | 27 | async def workflow_delete(self, id): 28 | raise NotImplementedError() 29 | 30 | async def task_query(self, page, page_size): 31 | raise NotImplementedError() 32 | 33 | async def task_create(self, task): 34 | raise NotImplementedError() 35 | 36 | async def task_get(self, id): 37 | raise NotImplementedError() 38 | 39 | 40 | 41 | class FileBackend(Backend): 42 | 43 | def __init__(self, url): 44 | self._lock = mp.Lock() 45 | self._url = url 46 | self.initialize() 47 | 48 | def initialize(self, error_not_found=False): 49 | # load database from pickle file 50 | try: 51 | self.load() 52 | 53 | # initialize empty database if pickle file doesn't exist 54 | except FileNotFoundError: 55 | self._db = { 56 | 'workflows': [], 57 | 'tasks': [] 58 | } 59 | self.save() 60 | 61 | def load(self): 62 | self._db = pickle.load(open(self._url, 'rb')) 63 | 64 | def save(self): 65 | pickle.dump(self._db, open(self._url, 'wb')) 66 | 67 | async def workflow_query(self, page, page_size): 68 | self._lock.acquire() 69 | self.load() 70 | 71 | # sort workflows by date_created in descending order 72 | self._db['workflows'].sort(key=lambda w: w['date_created'], reverse=True) 73 | 74 | # return the specified page of workflows 75 | workflows = self._db['workflows'][(page * page_size) : ((page + 1) * page_size)] 76 | 77 | self._lock.release() 78 | 79 | return workflows 80 | 81 | async def workflow_create(self, workflow): 82 | self._lock.acquire() 83 | self.load() 84 | 85 | # append workflow to list of workflows 86 | self._db['workflows'].append(workflow) 87 | 88 | self.save() 89 | self._lock.release() 90 | 91 | async def workflow_get(self, id): 92 | self._lock.acquire() 93 | self.load() 94 | 95 | # search for workflow by id 96 | workflow = None 97 | 98 | for w in self._db['workflows']: 99 | if w['_id'] == id: 100 | workflow = w 101 | break 102 | 103 | self._lock.release() 104 | 105 | # return workflow or raise error if workflow wasn't found 106 | if workflow != None: 107 | return workflow 108 | else: 109 | raise IndexError('Workflow was not found') 110 | 111 | async def workflow_update(self, id, workflow): 112 | self._lock.acquire() 113 | self.load() 114 | 115 | # search for workflow by id and update it 116 | found = False 117 | 118 | for i, w in enumerate(self._db['workflows']): 119 | if w['_id'] == id: 120 | # update workflow 121 | self._db['workflows'][i] = workflow 122 | found = True 123 | break 124 | 125 | self.save() 126 | self._lock.release() 127 | 128 | # raise error if workflow wasn't found 129 | if not found: 130 | raise IndexError('Workflow was not found') 131 | 132 | async def workflow_delete(self, id): 133 | self._lock.acquire() 134 | self.load() 135 | 136 | # search for workflow by id and delete it 137 | found = False 138 | 139 | for i, w in enumerate(self._db['workflows']): 140 | if w['_id'] == id: 141 | # delete workflow 142 | self._db['workflows'].pop(i) 143 | found = True 144 | break 145 | 146 | self.save() 147 | self._lock.release() 148 | 149 | # raise error if workflow wasn't found 150 | if not found: 151 | raise IndexError('Workflow was not found') 152 | 153 | async def task_query(self, page, page_size): 154 | self._lock.acquire() 155 | self.load() 156 | 157 | # sort tasks by date_created in descending order 158 | self._db['tasks'].sort(key=lambda t: t['utcTime'], reverse=True) 159 | 160 | # return the specified page of workflows 161 | tasks = self._db['tasks'][(page * page_size) : ((page + 1) * page_size)] 162 | 163 | self._lock.release() 164 | 165 | return tasks 166 | 167 | async def task_query_pipelines(self): 168 | self._lock.acquire() 169 | self.load() 170 | 171 | # extract list of unique pipelines from all 'started' events 172 | pipelines = [t['metadata']['workflow']['projectName'] for t in self._db['tasks'] if t['event'] == 'started'] 173 | pipelines = list(set(pipelines)) 174 | 175 | self._lock.release() 176 | 177 | return pipelines 178 | 179 | async def task_query_pipeline(self, pipeline): 180 | self._lock.acquire() 181 | self.load() 182 | 183 | # find all runs of the given pipeline 184 | run_ids = [t['runId'] for t in self._db['tasks'] if t['event'] == 'started' and t['metadata']['workflow']['projectName'] == pipeline] 185 | 186 | # find all tasks associated with the given runs 187 | tasks = [t for t in self._db['tasks'] if t['event'] == 'process_completed' and t['runId'] in run_ids] 188 | 189 | self._lock.release() 190 | 191 | return tasks 192 | 193 | async def task_create(self, task): 194 | self._lock.acquire() 195 | self.load() 196 | 197 | # append workflow to list of workflows 198 | self._db['tasks'].append(task) 199 | 200 | self.save() 201 | self._lock.release() 202 | 203 | async def task_get(self, id): 204 | self._lock.acquire() 205 | self.load() 206 | 207 | # search for task by id 208 | task = None 209 | 210 | for t in self._db['tasks']: 211 | if t['_id'] == id: 212 | task = t 213 | break 214 | 215 | self._lock.release() 216 | 217 | # raise error if task wasn't found 218 | if task != None: 219 | return task 220 | else: 221 | raise IndexError('Task was not found') 222 | 223 | 224 | 225 | class MongoBackend(Backend): 226 | def __init__(self, url): 227 | self._url = url 228 | self.initialize() 229 | 230 | def initialize(self): 231 | self._client = motor.motor_tornado.MotorClient(self._url) 232 | self._db = self._client['nextflow_api'] 233 | 234 | async def workflow_query(self, page, page_size): 235 | return await self._db.workflows \ 236 | .find() \ 237 | .sort('date_created', pymongo.DESCENDING) \ 238 | .skip(page * page_size) \ 239 | .to_list(length=page_size) 240 | 241 | async def workflow_create(self, workflow): 242 | return await self._db.workflows.insert_one(workflow) 243 | 244 | async def workflow_get(self, id): 245 | return await self._db.workflows.find_one({ '_id': id }) 246 | 247 | async def workflow_update(self, id, workflow): 248 | return await self._db.workflows.replace_one({ '_id': id }, workflow) 249 | 250 | async def workflow_delete(self, id): 251 | return await self._db.workflows.delete_one({ '_id': id }) 252 | 253 | async def task_query(self, page, page_size): 254 | return await self._db.tasks \ 255 | .find({}, { '_id': 1, 'runName': 1, 'utcTime': 1, 'event': 1 }) \ 256 | .sort('utcTime', pymongo.DESCENDING) \ 257 | .skip(page * page_size) \ 258 | .to_list(length=page_size) 259 | 260 | async def task_query_pipelines(self): 261 | # find all 'started' events 262 | tasks = await self._db.tasks \ 263 | .find({ 'event': 'started' }, { 'metadata.workflow.projectName': 1 }) \ 264 | .to_list(length=None) 265 | 266 | # extract list of unique pipelines 267 | pipelines = [t['metadata']['workflow']['projectName'] for t in tasks] 268 | pipelines = list(set(pipelines)) 269 | 270 | return pipelines 271 | 272 | async def task_query_pipeline(self, pipeline): 273 | # find all runs of the given pipeline 274 | runs = await self._db.tasks \ 275 | .find({ 'event': 'started', 'metadata.workflow.projectName': pipeline }, { 'runId': 1 }) \ 276 | .to_list(length=None) 277 | 278 | run_ids = [run['runId'] for run in runs] 279 | 280 | # find all tasks associated with the given runs 281 | tasks = await self._db.tasks \ 282 | .find({ 'event': 'process_completed', 'runId': { '$in': run_ids } }) \ 283 | .to_list(length=None) 284 | 285 | return tasks 286 | 287 | async def task_create(self, task): 288 | return await self._db.tasks.insert_one(task) 289 | 290 | async def task_get(self, id): 291 | return await self._db.tasks.find_one({ '_id': id }) 292 | -------------------------------------------------------------------------------- /bin/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # load settings from environment variables 4 | NXF_EXECUTOR = os.environ.get('NXF_EXECUTOR', default='local') 5 | PVC_NAME = os.environ.get('PVC_NAME') 6 | 7 | # define working directories 8 | BASE_DIRS = { 9 | 'k8s': '/workspace', 10 | 'local': '.', 11 | 'pbspro': '.' 12 | } 13 | BASE_DIR = BASE_DIRS[NXF_EXECUTOR] 14 | 15 | MODELS_DIR = os.path.join(BASE_DIR, '_models') 16 | TRACE_DIR = os.path.join(BASE_DIR, '_trace') 17 | WORKFLOWS_DIR = os.path.join(BASE_DIR, '_workflows') 18 | 19 | # validate environment settings 20 | if NXF_EXECUTOR == 'k8s' and PVC_NAME is None: 21 | raise EnvironmentError('Using k8s executor but PVC is not defined') -------------------------------------------------------------------------------- /bin/model.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import dill as pickle 3 | import forestci 4 | import h5py 5 | import io 6 | import json 7 | import numpy as np 8 | import pandas as pd 9 | import scipy.stats 10 | import sklearn.base 11 | import sklearn.dummy 12 | import sklearn.ensemble 13 | import sklearn.metrics 14 | import sklearn.model_selection 15 | import sklearn.pipeline 16 | import sklearn.preprocessing 17 | from tensorflow import keras 18 | 19 | import env 20 | 21 | 22 | 23 | def check_std(y_pred): 24 | if isinstance(y_pred, tuple): 25 | return y_pred 26 | else: 27 | return y_pred, np.zeros_like(y_pred) 28 | 29 | 30 | 31 | def predict_intervals(y_bar, y_std, ci=0.95): 32 | # compute z score 33 | _, n_stds = scipy.stats.norm.interval(ci) 34 | 35 | # compute intervals 36 | y_lower = y_bar - n_stds * y_std 37 | y_upper = y_bar + n_stds * y_std 38 | 39 | return y_lower, y_upper 40 | 41 | 42 | 43 | class KerasRegressor(keras.wrappers.scikit_learn.KerasRegressor): 44 | 45 | def __getstate__(self): 46 | state = self.__dict__ 47 | if 'model' in state: 48 | model = state['model'] 49 | model_hdf5_bio = io.BytesIO() 50 | with h5py.File(model_hdf5_bio, mode='w') as file: 51 | model.save(file) 52 | state['model'] = model_hdf5_bio 53 | state_copy = copy.deepcopy(state) 54 | state['model'] = model 55 | return state_copy 56 | else: 57 | return state 58 | 59 | def __setstate__(self, state): 60 | if 'model' in state: 61 | model_hdf5_bio = state['model'] 62 | with h5py.File(model_hdf5_bio, mode='r') as file: 63 | state['model'] = keras.models.load_model(file) 64 | self.__dict__ = state 65 | 66 | def predict(self, x): 67 | return np.squeeze(self.model(x)) 68 | 69 | 70 | 71 | class KerasRegressorWithIntervals(KerasRegressor): 72 | 73 | def inverse_tau(self, N, lmbda=1e-5, p_dropout=0.1, ls_2=0.005): 74 | return (2 * N * lmbda) / (1 - p_dropout) / ls_2 75 | 76 | def fit(self, X, y): 77 | # fit neural network 78 | history = super(KerasRegressorWithIntervals, self).fit(X, y) 79 | 80 | # save training set size for tau adjustment 81 | self.n_train_samples = X.shape[0] 82 | 83 | return history 84 | 85 | def predict(self, X, n_preds=10): 86 | # compute several predictions for each sample 87 | y_preds = np.array([super(KerasRegressorWithIntervals, self).predict(X) for _ in range(n_preds)]) 88 | 89 | # compute tau adjustment 90 | tau_inv = self.inverse_tau(self.n_train_samples) 91 | 92 | # compute mean and variance 93 | y_bar = np.mean(y_preds, axis=0) 94 | y_std = np.std(y_preds, axis=0) + tau_inv 95 | 96 | return y_bar, y_std 97 | 98 | 99 | 100 | class RandomForestRegressorWithIntervals(sklearn.ensemble.RandomForestRegressor): 101 | 102 | def fit(self, X, y): 103 | # fit random forest 104 | super(RandomForestRegressorWithIntervals, self).fit(X, y) 105 | 106 | # save training set for variance estimate 107 | self.X_train = X 108 | 109 | return self 110 | 111 | def predict(self, X): 112 | # compute predictions 113 | y_bar = super(RandomForestRegressorWithIntervals, self).predict(X) 114 | 115 | # compute variance estimate 116 | y_var = forestci.random_forest_error(self, self.X_train, X) 117 | y_std = np.sqrt(y_var) 118 | 119 | return y_bar, y_std 120 | 121 | 122 | 123 | def select_rows_by_values(df, column, values): 124 | return pd.concat([df[df[column].astype(str) == v] for v in values]) 125 | 126 | 127 | 128 | def is_categorical(df, column): 129 | return column != None and df[column].dtype.kind in 'OSUV' 130 | 131 | 132 | 133 | def create_dataset(df, inputs, target=None): 134 | # extract input/target data from trace data 135 | X = df[inputs] 136 | y = df[target].values if target != None else None 137 | 138 | # one-hot encode categorical inputs, save categories 139 | options = {column: None for column in inputs} 140 | 141 | for column in inputs: 142 | if is_categorical(X, column): 143 | options[column] = X[column].unique().tolist() 144 | X = pd.get_dummies(X, columns=[column], drop_first=False) 145 | 146 | # save column order 147 | columns = list(X.columns) 148 | 149 | return X.values, y, columns, options 150 | 151 | 152 | 153 | def create_dummy(): 154 | return sklearn.dummy.DummyRegressor(strategy='quantile', quantile=1.0) 155 | 156 | 157 | 158 | def create_mlp( 159 | input_shape, 160 | hidden_layer_sizes=[], 161 | activation='relu', 162 | activation_target=None, 163 | l1=0, 164 | l2=1e-5, 165 | p_dropout=0.1, 166 | intervals=False, 167 | optimizer='adam', # lr=0.001 168 | loss='mean_absolute_error', 169 | epochs=200): 170 | 171 | def build_fn(): 172 | # create a 3-layer neural network 173 | x_input = keras.Input(shape=input_shape) 174 | 175 | x = x_input 176 | for units in hidden_layer_sizes: 177 | x = keras.layers.Dense( 178 | units=units, 179 | activation=activation, 180 | kernel_regularizer=keras.regularizers.l1_l2(l1, l2), 181 | bias_regularizer=keras.regularizers.l1_l2(l1, l2) 182 | )(x) 183 | 184 | if p_dropout != None: 185 | training = True if intervals else None 186 | x = keras.layers.Dropout(p_dropout)(x, training=training) 187 | 188 | y_output = keras.layers.Dense(units=1, activation=activation_target)(x) 189 | 190 | mlp = keras.models.Model(x_input, y_output) 191 | 192 | # compile the model 193 | mlp.compile(optimizer=optimizer, loss=loss) 194 | 195 | return mlp 196 | 197 | if intervals: 198 | Regressor = KerasRegressorWithIntervals 199 | else: 200 | Regressor = KerasRegressor 201 | 202 | return Regressor( 203 | build_fn=build_fn, 204 | batch_size=32, 205 | epochs=epochs, 206 | verbose=False, 207 | validation_split=0.1 208 | ) 209 | 210 | 211 | 212 | def create_rf(criterion='mae', intervals=False): 213 | if intervals: 214 | Regressor = RandomForestRegressorWithIntervals 215 | else: 216 | Regressor = sklearn.ensemble.RandomForestRegressor 217 | 218 | return Regressor(n_estimators=100, criterion=criterion) 219 | 220 | 221 | 222 | def create_pipeline(reg, scaler_fn=sklearn.preprocessing.MaxAbsScaler): 223 | return sklearn.pipeline.Pipeline([ 224 | ('scaler', scaler_fn()), 225 | ('reg', reg) 226 | ]) 227 | 228 | 229 | 230 | def mean_absolute_percentage_error(y_true, y_pred): 231 | y_true = np.array(y_true) 232 | y_pred = np.array(y_pred) 233 | return 100 * np.mean(np.abs((y_true - y_pred) / y_true)) 234 | 235 | 236 | 237 | def prediction_interval_coverage(y_true, y_lower, y_upper): 238 | return 100 * np.mean((y_lower <= y_true) & (y_true <= y_upper)) 239 | 240 | 241 | 242 | def evaluate_cv(model, X, y, cv=5, ci=0.95): 243 | # initialize prediction arrays 244 | y_bar = np.empty_like(y) 245 | y_std = np.empty_like(y) 246 | 247 | # perform k-fold cross validation 248 | kfold = sklearn.model_selection.KFold(n_splits=cv, shuffle=True) 249 | 250 | for train_index, test_index in kfold.split(X): 251 | # reset session (for keras models) 252 | keras.backend.clear_session() 253 | 254 | # extract train/test split 255 | X_train, X_test = X[train_index], X[test_index] 256 | y_train, y_test = y[train_index], y[test_index] 257 | 258 | # train model 259 | model_ = sklearn.base.clone(model) 260 | model_.fit(X_train, y_train) 261 | 262 | # get model predictions 263 | y_bar_i, y_std_i = check_std(model_.predict(X_test)) 264 | 265 | y_bar[test_index] = y_bar_i 266 | y_std[test_index] = y_std_i 267 | 268 | # compute prediction intervals 269 | y_lower, y_upper = predict_intervals(y_bar, y_std, ci=ci) 270 | 271 | # evaluate predictions 272 | scores = { 273 | 'mpe': mean_absolute_percentage_error(y, y_bar), 274 | 'cov': prediction_interval_coverage(y, y_lower, y_upper) 275 | } 276 | 277 | return scores, y_bar, y_std 278 | 279 | 280 | 281 | def train(df, args): 282 | defaults = { 283 | 'selectors': [], 284 | 'min_std': 0.1, 285 | 'scaler': 'maxabs', 286 | 'model_type': 'mlp', 287 | 'hidden_layer_sizes': [128, 128, 128], 288 | 'epochs': 200, 289 | 'intervals': True 290 | } 291 | 292 | args = {**defaults, **args} 293 | 294 | # apply selectorss to dataframe 295 | for selector in args['selectors']: 296 | # parse column and selected values 297 | column, values = selector.split('=') 298 | values = values.split(',') 299 | 300 | # select rows from dataframe 301 | if values != None and len(values) > 0: 302 | df = select_rows_by_values(df, column, values) 303 | 304 | # extract input/output data from trace data 305 | try: 306 | X, y, columns, options = create_dataset(df, args['inputs'], args['target']) 307 | except: 308 | raise RuntimeError('error: one or more input/output variables are not in the dataset') 309 | 310 | # select scaler 311 | try: 312 | scalers = { 313 | 'maxabs': sklearn.preprocessing.MaxAbsScaler, 314 | 'minmax': sklearn.preprocessing.MinMaxScaler, 315 | 'standard': sklearn.preprocessing.StandardScaler 316 | } 317 | Scaler = scalers[args['scaler']] 318 | except: 319 | raise RuntimeError('error: scaler %s not recognized' % (args['scaler'])) 320 | 321 | # use dummy regressor if target data has low variance 322 | if y.std() < args['min_std']: 323 | print('target value has low variance, using max value rounded up') 324 | model_type = 'dummy' 325 | else: 326 | model_type = args['model_type'] 327 | 328 | # create regressor 329 | if model_type == 'dummy': 330 | reg = create_dummy() 331 | 332 | elif model_type == 'mlp': 333 | reg = create_mlp( 334 | X.shape[1], 335 | hidden_layer_sizes=args['hidden_layer_sizes'], 336 | epochs=args['epochs'], 337 | intervals=args['intervals']) 338 | 339 | elif model_type == 'rf': 340 | reg = create_rf(intervals=args['intervals']) 341 | 342 | # create model 343 | model = create_pipeline(reg, scaler_fn=Scaler) 344 | 345 | # save order of input columns 346 | args['inputs'] = options 347 | args['columns'] = columns 348 | 349 | # train and evaluate model 350 | scores, _, _ = evaluate_cv(model, X, y) 351 | 352 | # train model on full dataset 353 | model.fit(X, y) 354 | 355 | # workaround for keras models 356 | try: 357 | model.named_steps['regressor'].build_fn = None 358 | except: 359 | pass 360 | 361 | # save model to file 362 | f = open('%s/%s.pkl' % (env.MODELS_DIR, args['model_name']), 'wb') 363 | pickle.dump(model, f) 364 | 365 | # save args to file 366 | f = open('%s/%s.json' % (env.MODELS_DIR, args['model_name']), 'w') 367 | json.dump(args, f) 368 | 369 | # return results 370 | y_bar, y_std = check_std(model.predict(X)) 371 | 372 | return { 373 | 'y_true': y, 374 | 'y_pred': y_bar, 375 | 'mpe': scores['mpe'], 376 | 'cov': scores['cov'] 377 | } 378 | 379 | 380 | 381 | def predict(model_name, inputs, ci=0.95): 382 | # load model 383 | f = open('%s/%s.pkl' % (env.MODELS_DIR, model_name), 'rb') 384 | model = pickle.load(f) 385 | 386 | # load model configuration 387 | f = open('%s/%s.json' % (env.MODELS_DIR, model_name), 'r') 388 | args = json.load(f) 389 | 390 | # convert inputs into an ordered vector 391 | x_input = {} 392 | 393 | for column, options in args['inputs'].items(): 394 | # one-hot encode categorical inputs 395 | if options != None: 396 | for v in options: 397 | x_input['%s_%s' % (column, v)] = (inputs[column] == v) 398 | 399 | # copy numerical inputs directly 400 | else: 401 | x_input[column] = inputs[column] 402 | 403 | x_input = [float(x_input[c]) for c in args['columns']] 404 | 405 | # perform inference 406 | X = np.array([x_input]) 407 | y_bar, y_std = check_std(model.predict(X)) 408 | y_lower, y_upper = predict_intervals(y_bar, y_std, ci=ci) 409 | 410 | # return results 411 | return { 412 | args['target']: [float(y_lower), float(y_bar), float(y_upper)] 413 | } -------------------------------------------------------------------------------- /bin/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import base64 4 | import bson 5 | import json 6 | import multiprocessing as mp 7 | import os 8 | import pandas as pd 9 | import shutil 10 | import socket 11 | import subprocess 12 | import time 13 | import tornado 14 | import tornado.escape 15 | import tornado.httpserver 16 | import tornado.ioloop 17 | import tornado.options 18 | import tornado.web 19 | 20 | import backend 21 | import env 22 | import model as Model 23 | import visualizer as Visualizer 24 | import workflow as Workflow 25 | 26 | 27 | 28 | def list_dir_recursive(path, relpath_start=''): 29 | files = [os.path.join(dir, f) for (dir, subdirs, filenames) in os.walk(path) for f in filenames] 30 | files = [os.path.relpath(f, start=relpath_start) for f in files] 31 | files.sort() 32 | 33 | return files 34 | 35 | 36 | 37 | def message(status, message): 38 | return { 39 | 'status': status, 40 | 'message': message 41 | } 42 | 43 | 44 | 45 | class WorkflowQueryHandler(tornado.web.RequestHandler): 46 | 47 | async def get(self): 48 | page = int(self.get_query_argument('page', 0)) 49 | page_size = int(self.get_query_argument('page_size', 100)) 50 | 51 | db = self.settings['db'] 52 | workflows = await db.workflow_query(page, page_size) 53 | 54 | self.set_status(200) 55 | self.set_header('content-type', 'application/json') 56 | self.write(tornado.escape.json_encode(workflows)) 57 | 58 | 59 | 60 | class WorkflowCreateHandler(tornado.web.RequestHandler): 61 | 62 | REQUIRED_KEYS = set([ 63 | 'pipeline' 64 | ]) 65 | 66 | DEFAULTS = { 67 | 'name': '', 68 | 'params_format': '', 69 | 'params_data': '', 70 | 'profiles': 'standard', 71 | 'revision': 'master', 72 | 'input_dir': 'input', 73 | 'output_dir': 'output', 74 | 'attempts': 0 75 | } 76 | 77 | def get(self): 78 | workflow = {**self.DEFAULTS, **{ '_id': '0' }} 79 | 80 | self.set_status(200) 81 | self.set_header('content-type', 'application/json') 82 | self.write(tornado.escape.json_encode(workflow)) 83 | 84 | async def post(self): 85 | db = self.settings['db'] 86 | 87 | # make sure request body is valid 88 | try: 89 | data = tornado.escape.json_decode(self.request.body) 90 | missing_keys = self.REQUIRED_KEYS - data.keys() 91 | except json.JSONDecodeError: 92 | self.set_status(422) 93 | self.write(message(422, 'Ill-formatted JSON')) 94 | return 95 | 96 | if missing_keys: 97 | self.set_status(400) 98 | self.write(message(400, 'Missing required field(s): %s' % list(missing_keys))) 99 | return 100 | 101 | # create workflow 102 | workflow = {**self.DEFAULTS, **data, **{ 'status': 'nascent' }} 103 | workflow['_id'] = str(bson.ObjectId()) 104 | 105 | # append creation timestamp to workflow 106 | workflow['date_created'] = int(time.time() * 1000) 107 | 108 | # transform pipeline name to lowercase 109 | workflow['pipeline'] = workflow['pipeline'].lower() 110 | 111 | # save workflow 112 | await db.workflow_create(workflow) 113 | 114 | # create workflow directory 115 | workflow_dir = os.path.join(env.WORKFLOWS_DIR, workflow['_id']) 116 | os.makedirs(workflow_dir) 117 | 118 | self.set_status(200) 119 | self.set_header('content-type', 'application/json') 120 | self.write(tornado.escape.json_encode({ '_id': workflow['_id'] })) 121 | 122 | 123 | 124 | 125 | class WorkflowEditHandler(tornado.web.RequestHandler): 126 | 127 | REQUIRED_KEYS = set([ 128 | 'pipeline' 129 | ]) 130 | 131 | DEFAULTS = { 132 | 'name': '', 133 | 'params_format': '', 134 | 'params_data': '', 135 | 'profiles': 'standard', 136 | 'revision': 'master', 137 | 'input_dir': 'input', 138 | 'output_dir': 'output', 139 | 'attempts': 0 140 | } 141 | 142 | async def get(self, id): 143 | db = self.settings['db'] 144 | 145 | try: 146 | # get workflow 147 | workflow = await db.workflow_get(id) 148 | 149 | # append list of input files 150 | workflow_dir = os.path.join(env.WORKFLOWS_DIR, id) 151 | input_dir = os.path.join(workflow_dir, workflow['input_dir']) 152 | output_dir = os.path.join(workflow_dir, workflow['output_dir']) 153 | 154 | if os.path.exists(input_dir): 155 | workflow['input_files'] = list_dir_recursive(input_dir, relpath_start=workflow_dir) 156 | else: 157 | workflow['input_files'] = [] 158 | 159 | # append list of output files 160 | if os.path.exists(output_dir): 161 | workflow['output_files'] = list_dir_recursive(output_dir, relpath_start=workflow_dir) 162 | else: 163 | workflow['output_files'] = [] 164 | 165 | # append status of output data 166 | workflow['output_data'] = os.path.exists('%s/%s-output.tar.gz' % (workflow_dir, id)) 167 | 168 | self.set_status(200) 169 | self.set_header('content-type', 'application/json') 170 | self.write(tornado.escape.json_encode(workflow)) 171 | except: 172 | self.set_status(404) 173 | self.write(message(404, 'Failed to get workflow \"%s\"' % id)) 174 | 175 | async def post(self, id): 176 | db = self.settings['db'] 177 | 178 | # make sure request body is valid 179 | try: 180 | data = tornado.escape.json_decode(self.request.body) 181 | missing_keys = self.REQUIRED_KEYS - data.keys() 182 | except json.JSONDecodeError: 183 | self.set_status(422) 184 | self.write(message(422, 'Ill-formatted JSON')) 185 | 186 | if missing_keys: 187 | self.set_status(400) 188 | self.write(message(400, 'Missing required field(s): %s' % list(missing_keys))) 189 | return 190 | 191 | try: 192 | # update workflow from request body 193 | workflow = await db.workflow_get(id) 194 | workflow = {**self.DEFAULTS, **workflow, **data} 195 | 196 | # transform pipeline name to lowercase 197 | workflow['pipeline'] = workflow['pipeline'].lower() 198 | 199 | # save workflow 200 | await db.workflow_update(id, workflow) 201 | 202 | self.set_status(200) 203 | self.set_header('content-type', 'application/json') 204 | self.write(tornado.escape.json_encode({ '_id': id })) 205 | except: 206 | self.set_status(404) 207 | self.write(message(404, 'Failed to update workflow \"%s\"' % id)) 208 | 209 | async def delete(self, id): 210 | db = self.settings['db'] 211 | 212 | try: 213 | # delete workflow 214 | await db.workflow_delete(id) 215 | 216 | # delete workflow directory 217 | shutil.rmtree(os.path.join(env.WORKFLOWS_DIR, id), ignore_errors=True) 218 | 219 | self.set_status(200) 220 | self.write(message(200, 'Workflow \"%s\" was deleted' % id)) 221 | except: 222 | self.set_status(404) 223 | self.write(message(404, 'Failed to delete workflow \"%s\"' % id)) 224 | 225 | 226 | 227 | 228 | class WorkflowUploadHandler(tornado.web.RequestHandler): 229 | 230 | async def post(self, id): 231 | db = self.settings['db'] 232 | 233 | # make sure request body contains files 234 | files = self.request.files 235 | 236 | if not files: 237 | self.set_status(400) 238 | self.write(message(400, 'No files were uploaded')) 239 | return 240 | 241 | # get workflow 242 | workflow = await db.workflow_get(id) 243 | 244 | # initialize input directory 245 | input_dir = os.path.join(env.WORKFLOWS_DIR, id, workflow['input_dir']) 246 | os.makedirs(input_dir, exist_ok=True) 247 | 248 | # save uploaded files to input directory 249 | filenames = [] 250 | 251 | for f_list in files.values(): 252 | for f_arg in f_list: 253 | filename, body = f_arg['filename'], f_arg['body'] 254 | with open(os.path.join(input_dir, filename), 'wb') as f: 255 | f.write(body) 256 | filenames.append(filename) 257 | 258 | self.set_status(200) 259 | self.write(message(200, 'File \"%s\" was uploaded for workflow \"%s\" successfully' % (filenames, id))) 260 | 261 | 262 | 263 | class WorkflowLaunchHandler(tornado.web.RequestHandler): 264 | 265 | resume = False 266 | 267 | async def post(self, id): 268 | db = self.settings['db'] 269 | 270 | try: 271 | # get workflow 272 | workflow = await db.workflow_get(id) 273 | 274 | # make sure workflow is not already running 275 | if workflow['status'] == 'running': 276 | self.set_status(400) 277 | self.write(message(400, 'Workflow \"%s\" is already running' % id)) 278 | return 279 | 280 | # copy nextflow.config from input directory if it exists 281 | workflow_dir = os.path.join(env.WORKFLOWS_DIR, id) 282 | input_dir = os.path.join(workflow_dir, workflow['input_dir']) 283 | src = os.path.join(input_dir, 'nextflow.config') 284 | dst = os.path.join(workflow_dir, 'nextflow.config') 285 | 286 | if os.path.exists(dst): 287 | os.remove(dst) 288 | 289 | if os.path.exists(src): 290 | shutil.copyfile(src, dst) 291 | 292 | # append additional settings to nextflow.config 293 | with open(dst, 'a') as f: 294 | weblog_url = 'http://%s:%d/api/tasks' % (socket.gethostbyname(socket.gethostname()), tornado.options.options.port) 295 | f.write('weblog { enabled = true\n url = \"%s\" }\n' % (weblog_url)) 296 | f.write('k8s { launchDir = \"%s\" }\n' % (workflow_dir)) 297 | 298 | # update workflow status 299 | workflow['status'] = 'running' 300 | workflow['date_submitted'] = int(time.time() * 1000) 301 | workflow['attempts'] += 1 302 | 303 | await db.workflow_update(id, workflow) 304 | 305 | # launch workflow as a child process 306 | p = mp.Process(target=Workflow.launch, args=(db, workflow, self.resume)) 307 | p.start() 308 | 309 | self.set_status(200) 310 | self.write(message(200, 'Workflow \"%s\" was launched' % id)) 311 | except: 312 | self.set_status(404) 313 | self.write(message(404, 'Failed to launch workflow \"%s\"' % id)) 314 | 315 | 316 | 317 | class WorkflowResumeHandler(WorkflowLaunchHandler): 318 | 319 | resume = True 320 | 321 | 322 | 323 | class WorkflowCancelHandler(tornado.web.RequestHandler): 324 | 325 | async def post(self, id): 326 | db = self.settings['db'] 327 | 328 | try: 329 | # get workflow 330 | workflow = await db.workflow_get(id) 331 | workflow = {**{ 'pid': -1 }, **workflow} 332 | 333 | # cancel workflow 334 | Workflow.cancel(workflow) 335 | 336 | # update workflow status 337 | workflow['status'] = 'failed' 338 | workflow['pid'] = -1 339 | 340 | await db.workflow_update(id, workflow) 341 | 342 | self.set_status(200) 343 | self.write(message(200, 'Workflow \"%s\" was canceled' % id)) 344 | except: 345 | self.set_status(404) 346 | self.write(message(404, 'Failed to cancel workflow \"%s\"' % id)) 347 | 348 | 349 | 350 | class WorkflowLogHandler(tornado.web.RequestHandler): 351 | 352 | async def get(self, id): 353 | db = self.settings['db'] 354 | 355 | try: 356 | # get workflow 357 | workflow = await db.workflow_get(id) 358 | 359 | # append log if it exists 360 | log_file = os.path.join(env.WORKFLOWS_DIR, id, '.workflow.log') 361 | 362 | if os.path.exists(log_file): 363 | f = open(log_file) 364 | log = ''.join(f.readlines()) 365 | else: 366 | log = '' 367 | 368 | # construct response data 369 | data = { 370 | '_id': id, 371 | 'status': workflow['status'], 372 | 'attempts': workflow['attempts'], 373 | 'log': log 374 | } 375 | 376 | self.set_status(200) 377 | self.set_header('content-type', 'application/json') 378 | self.set_header('cache-control', 'no-store, no-cache, must-revalidate, max-age=0') 379 | self.write(tornado.escape.json_encode(data)) 380 | except: 381 | self.set_status(404) 382 | self.write(message(404, 'Failed to fetch log for workflow \"%s\"' % id)) 383 | 384 | 385 | 386 | class WorkflowDownloadHandler(tornado.web.StaticFileHandler): 387 | 388 | def parse_url_path(self, id): 389 | # provide output file if path is specified, otherwise output data archive 390 | filename_default = '%s-output.tar.gz' % id 391 | filename = self.get_query_argument('path', filename_default) 392 | 393 | self.set_header('content-disposition', 'attachment; filename=\"%s\"' % filename) 394 | return os.path.join(id, filename) 395 | 396 | 397 | 398 | class TaskQueryHandler(tornado.web.RequestHandler): 399 | 400 | async def get(self): 401 | page = int(self.get_query_argument('page', 0)) 402 | page_size = int(self.get_query_argument('page_size', 100)) 403 | 404 | db = self.settings['db'] 405 | tasks = await db.task_query(page, page_size) 406 | 407 | self.set_status(200) 408 | self.set_header('content-type', 'application/json') 409 | self.write(tornado.escape.json_encode(tasks)) 410 | 411 | async def post(self): 412 | db = self.settings['db'] 413 | 414 | # make sure request body is valid 415 | try: 416 | task = tornado.escape.json_decode(self.request.body) 417 | except json.JSONDecodeError: 418 | self.set_status(422) 419 | self.write(message(422, 'Ill-formatted JSON')) 420 | return 421 | 422 | try: 423 | # append id to task 424 | task['_id'] = str(bson.ObjectId()) 425 | 426 | # extract input features for task 427 | if task['event'] == 'process_completed': 428 | # load execution log 429 | filenames = ['.command.log', '.command.out', '.command.err'] 430 | filenames = [os.path.join(task['trace']['workdir'], filename) for filename in filenames] 431 | files = [open(filename) for filename in filenames if os.path.exists(filename)] 432 | lines = [line.strip() for f in files for line in f] 433 | 434 | # parse input features from trace directives 435 | PREFIX = '#TRACE' 436 | lines = [line[len(PREFIX):] for line in lines if line.startswith(PREFIX)] 437 | items = [line.split('=') for line in lines] 438 | conditions = {k.strip(): v.strip() for k, v in items} 439 | 440 | # append input features to task trace 441 | task['trace'] = {**task['trace'], **conditions} 442 | 443 | # save task 444 | await db.task_create(task) 445 | 446 | # update workflow status on completed event 447 | if task['event'] == 'completed': 448 | # get workflow 449 | workflow_id = task['runName'].split('-')[1] 450 | workflow = await db.workflow_get(workflow_id) 451 | 452 | # update workflow status 453 | success = task['metadata']['workflow']['success'] 454 | if success: 455 | workflow['status'] = 'completed' 456 | else: 457 | workflow['status'] = 'failed' 458 | 459 | await db.workflow_update(workflow['_id'], workflow) 460 | 461 | self.set_status(200) 462 | self.set_header('content-type', 'application/json') 463 | self.write(tornado.escape.json_encode({ '_id': task['_id'] })) 464 | except: 465 | self.set_status(404) 466 | self.write(message(404, 'Failed to save task')) 467 | 468 | 469 | 470 | class TaskLogHandler(tornado.web.RequestHandler): 471 | 472 | async def get(self, id): 473 | db = self.settings['db'] 474 | 475 | try: 476 | # get workflow 477 | task = await db.task_get(id) 478 | workdir = task['trace']['workdir'] 479 | 480 | # construct response data 481 | data = { 482 | '_id': id, 483 | 'out': '', 484 | 'err': '' 485 | } 486 | 487 | # append log files if they exist 488 | out_file = os.path.join(workdir, '.command.out') 489 | err_file = os.path.join(workdir, '.command.err') 490 | 491 | if os.path.exists(out_file): 492 | f = open(out_file) 493 | data['out'] = ''.join(f.readlines()) 494 | 495 | if os.path.exists(err_file): 496 | f = open(err_file) 497 | data['err'] = ''.join(f.readlines()) 498 | 499 | self.set_status(200) 500 | self.set_header('content-type', 'application/json') 501 | self.write(tornado.escape.json_encode(data)) 502 | except: 503 | self.set_status(404) 504 | self.write(message(404, 'Failed to fetch log for workflow \"%s\"' % id)) 505 | 506 | 507 | 508 | class TaskQueryPipelinesHandler(tornado.web.RequestHandler): 509 | 510 | async def get(self): 511 | db = self.settings['db'] 512 | 513 | try: 514 | # query pipelines from database 515 | pipelines = await db.task_query_pipelines() 516 | 517 | self.set_status(200) 518 | self.set_header('content-type', 'application/json') 519 | self.write(tornado.escape.json_encode(pipelines)) 520 | except Exception as e: 521 | self.set_status(404) 522 | self.write(message(404, 'Failed to perform query')) 523 | raise e 524 | 525 | 526 | 527 | class TaskQueryPipelineHandler(tornado.web.RequestHandler): 528 | 529 | async def get(self, pipeline): 530 | db = self.settings['db'] 531 | 532 | try: 533 | # query tasks from database 534 | pipeline = pipeline.lower() 535 | tasks = await db.task_query_pipeline(pipeline) 536 | tasks = [task['trace'] for task in tasks] 537 | 538 | # separate tasks into dataframes by process 539 | process_names = list(set([task['process'] for task in tasks])) 540 | dfs = {} 541 | 542 | for process in process_names: 543 | dfs[process] = [task for task in tasks if task['process'] == process] 544 | 545 | self.set_status(200) 546 | self.set_header('content-type', 'application/json') 547 | self.write(tornado.escape.json_encode(dfs)) 548 | except Exception as e: 549 | self.set_status(404) 550 | self.write(message(404, 'Failed to perform query')) 551 | raise e 552 | 553 | 554 | 555 | class TaskArchiveHandler(tornado.web.RequestHandler): 556 | 557 | async def get(self, pipeline): 558 | db = self.settings['db'] 559 | 560 | try: 561 | # query tasks from database 562 | pipeline = pipeline.lower() 563 | tasks = await db.task_query_pipeline(pipeline) 564 | tasks = [task['trace'] for task in tasks] 565 | 566 | # separate tasks into dataframes by process 567 | process_names = list(set([task['process'] for task in tasks])) 568 | dfs = {} 569 | 570 | for process in process_names: 571 | dfs[process] = pd.DataFrame([task for task in tasks if task['process'] == process]) 572 | 573 | # change to trace directory 574 | os.chdir(env.TRACE_DIR) 575 | 576 | # save dataframes to csv files 577 | for process in process_names: 578 | filename = 'trace.%s.txt' % (process) 579 | dfs[process].to_csv(filename, sep='\t', index=False) 580 | 581 | # create zip archive of trace files 582 | zipfile = 'trace.%s.zip' % (pipeline.replace('/', '__')) 583 | files = ['trace.%s.txt' % (process) for process in process_names] 584 | 585 | subprocess.run(['zip', zipfile] + files, check=True) 586 | subprocess.run(['rm', '-f'] + files, check=True) 587 | 588 | # return to working directory 589 | os.chdir('..') 590 | 591 | self.set_status(200) 592 | self.write(message(200, 'Archive was created')) 593 | except Exception as e: 594 | self.set_status(404) 595 | self.write(message(404, 'Failed to create archive')) 596 | raise e 597 | 598 | 599 | 600 | class TaskArchiveDownloadHandler(tornado.web.StaticFileHandler): 601 | 602 | def parse_url_path(self, pipeline): 603 | # get filename of trace archive 604 | filename = 'trace.%s.zip' % (pipeline.replace('/', '__')) 605 | 606 | self.set_header('content-disposition', 'attachment; filename=\"%s\"' % filename) 607 | return filename 608 | 609 | 610 | 611 | class TaskVisualizeHandler(tornado.web.RequestHandler): 612 | 613 | async def post(self): 614 | db = self.settings['db'] 615 | 616 | try: 617 | # parse request body 618 | data = tornado.escape.json_decode(self.request.body) 619 | 620 | # query task dataset from database 621 | pipeline = data['pipeline'].lower() 622 | tasks = await db.task_query_pipeline(pipeline) 623 | tasks = [task['trace'] for task in tasks] 624 | tasks_process = [task for task in tasks if task['process'] == data['process']] 625 | 626 | df = pd.DataFrame(tasks_process) 627 | 628 | # prepare visualizer args 629 | args = data['args'] 630 | args['plot_name'] = str(bson.ObjectId()) 631 | 632 | if args['selectors'] == '': 633 | args['selectors'] = [] 634 | else: 635 | args['selectors'] = args['selectors'].split(' ') 636 | 637 | # append columns from merge process if specified 638 | if 'merge_process' in args: 639 | # load merge data 640 | tasks_merge = [task for task in tasks if task['process'] == args['merge_process']] 641 | df_merge = pd.DataFrame(tasks_merge) 642 | 643 | # remove duplicate columns 644 | dupe_columns = set(df.columns).intersection(df_merge.columns) 645 | dupe_columns.remove(args['merge_key']) 646 | df_merge.drop(columns=dupe_columns, inplace=True) 647 | 648 | # append merge columns to data 649 | df = df.merge(df_merge, on=args['merge_key'], how='left', copy=False) 650 | 651 | # create visualization 652 | outfile = Visualizer.visualize(df, args) 653 | 654 | # encode image file into base64 655 | with open(outfile, 'rb') as f: 656 | image_data = base64.b64encode(f.read()).decode('utf-8') 657 | 658 | self.set_status(200) 659 | self.set_header('content-type', 'application/json') 660 | self.write(tornado.escape.json_encode(image_data)) 661 | except Exception as e: 662 | self.set_status(404) 663 | self.write(message(404, 'Failed to visualize data')) 664 | raise e 665 | 666 | 667 | 668 | class TaskEditHandler(tornado.web.RequestHandler): 669 | 670 | async def get(self, id): 671 | db = self.settings['db'] 672 | 673 | try: 674 | task = await db.task_get(id) 675 | 676 | self.set_status(200) 677 | self.set_header('content-type', 'application/json') 678 | self.write(tornado.escape.json_encode(task)) 679 | except: 680 | self.set_status(404) 681 | self.write(message(404, 'Failed to get task \"%s\"' % id)) 682 | 683 | 684 | 685 | class ModelTrainHandler(tornado.web.RequestHandler): 686 | 687 | async def post(self): 688 | db = self.settings['db'] 689 | 690 | try: 691 | # parse request body 692 | data = tornado.escape.json_decode(self.request.body) 693 | 694 | # query task dataset from database 695 | pipeline = data['pipeline'].lower() 696 | tasks = await db.task_query_pipeline(pipeline) 697 | tasks = [task['trace'] for task in tasks] 698 | tasks_process = [task for task in tasks if task['process'] == data['process']] 699 | 700 | df = pd.DataFrame(tasks_process) 701 | 702 | # prepare training args 703 | args = data['args'] 704 | args['hidden_layer_sizes'] = [int(v) for v in args['hidden_layer_sizes'].split(' ')] 705 | args['model_name'] = '%s.%s.%s' % (pipeline.replace('/', '__'), data['process'], args['target']) 706 | 707 | if args['selectors'] == '': 708 | args['selectors'] = [] 709 | else: 710 | args['selectors'] = args['selectors'].split(' ') 711 | 712 | # append columns from merge process if specified 713 | if args['merge_process'] != None: 714 | # load merge data 715 | tasks_merge = [task for task in tasks if task['process'] == args['merge_process']] 716 | df_merge = pd.DataFrame(tasks_merge) 717 | 718 | # remove duplicate columns 719 | dupe_columns = set(df.columns).intersection(df_merge.columns) 720 | dupe_columns.remove(args['merge_key']) 721 | df_merge.drop(columns=dupe_columns, inplace=True) 722 | 723 | # append merge columns to data 724 | df = df.merge(df_merge, on=args['merge_key'], how='left', copy=False) 725 | 726 | # train model 727 | results = Model.train(df, args) 728 | 729 | # visualize training results 730 | df = pd.DataFrame() 731 | df['y_true'] = results['y_true'] 732 | df['y_pred'] = results['y_pred'] 733 | 734 | outfile = Visualizer.visualize(df, { 735 | 'xaxis': 'y_true', 736 | 'yaxis': 'y_pred', 737 | 'plot_name': str(bson.ObjectId()) 738 | }) 739 | 740 | # encode image file into base64 741 | with open(outfile, 'rb') as f: 742 | results['scatterplot'] = base64.b64encode(f.read()).decode('utf-8') 743 | 744 | # remove extra fields from results 745 | del results['y_true'] 746 | del results['y_pred'] 747 | 748 | self.set_status(200) 749 | self.set_header('content-type', 'application/json') 750 | self.write(tornado.escape.json_encode(results)) 751 | except Exception as e: 752 | self.set_status(404) 753 | self.write(message(404, 'Failed to train model')) 754 | raise e 755 | 756 | 757 | 758 | class ModelConfigHandler(tornado.web.RequestHandler): 759 | 760 | async def get(self): 761 | try: 762 | # parse request body 763 | pipeline = self.get_argument('pipeline', default=None) 764 | process = self.get_argument('process', default=None) 765 | target = self.get_argument('target', default=None) 766 | 767 | # get model config file 768 | filename = '%s/%s.%s.%s.json' % (env.MODELS_DIR, pipeline.lower().replace('/', '__'), process, target) 769 | 770 | with open(filename, 'r') as f: 771 | config = json.load(f) 772 | 773 | self.set_status(200) 774 | self.set_header('content-type', 'application/json') 775 | self.write(tornado.escape.json_encode(config)) 776 | except Exception as e: 777 | self.set_status(404) 778 | self.write(message(404, 'Failed to get model config')) 779 | raise e 780 | 781 | 782 | 783 | class ModelPredictHandler(tornado.web.RequestHandler): 784 | 785 | async def post(self): 786 | try: 787 | # parse request body 788 | data = tornado.escape.json_decode(self.request.body) 789 | data['pipeline'] = data['pipeline'].lower() 790 | data['model_name'] = '%s.%s.%s' % (data['pipeline'].replace('/', '__'), data['process'], data['target']) 791 | 792 | # perform model prediction 793 | results = Model.predict(data['model_name'], data['inputs']) 794 | 795 | self.set_status(200) 796 | self.set_header('content-type', 'application/json') 797 | self.write(tornado.escape.json_encode(results)) 798 | except Exception as e: 799 | self.set_status(404) 800 | self.write(message(404, 'Failed to perform model prediction')) 801 | raise e 802 | 803 | 804 | 805 | if __name__ == '__main__': 806 | # parse command-line options 807 | tornado.options.define('backend', default='mongo', help='Database backend to use (file or mongo)') 808 | tornado.options.define('url-file', default='db.pkl', help='database file for file backend') 809 | tornado.options.define('url-mongo', default='localhost', help='mongodb service url for mongo backend') 810 | tornado.options.define('np', default=1, help='number of server processes') 811 | tornado.options.define('port', default=8080) 812 | tornado.options.parse_command_line() 813 | 814 | # initialize auxiliary directories 815 | os.makedirs(env.MODELS_DIR, exist_ok=True) 816 | os.makedirs(env.TRACE_DIR, exist_ok=True) 817 | os.makedirs(env.WORKFLOWS_DIR, exist_ok=True) 818 | 819 | # initialize api endpoints 820 | app = tornado.web.Application([ 821 | (r'/api/workflows', WorkflowQueryHandler), 822 | (r'/api/workflows/0', WorkflowCreateHandler), 823 | (r'/api/workflows/([a-zA-Z0-9-]+)', WorkflowEditHandler), 824 | (r'/api/workflows/([a-zA-Z0-9-]+)/upload', WorkflowUploadHandler), 825 | (r'/api/workflows/([a-zA-Z0-9-]+)/launch', WorkflowLaunchHandler), 826 | (r'/api/workflows/([a-zA-Z0-9-]+)/resume', WorkflowResumeHandler), 827 | (r'/api/workflows/([a-zA-Z0-9-]+)/cancel', WorkflowCancelHandler), 828 | (r'/api/workflows/([a-zA-Z0-9-]+)/log', WorkflowLogHandler), 829 | (r'/api/workflows/([a-zA-Z0-9-]+)/download', WorkflowDownloadHandler, dict(path=env.WORKFLOWS_DIR)), 830 | (r'/api/tasks', TaskQueryHandler), 831 | (r'/api/tasks/([a-zA-Z0-9-]+)/log', TaskLogHandler), 832 | (r'/api/tasks/pipelines', TaskQueryPipelinesHandler), 833 | (r'/api/tasks/pipelines/(.+)', TaskQueryPipelineHandler), 834 | (r'/api/tasks/archive/(.+)/download', TaskArchiveDownloadHandler, dict(path=env.TRACE_DIR)), 835 | (r'/api/tasks/archive/(.+)', TaskArchiveHandler), 836 | (r'/api/tasks/visualize', TaskVisualizeHandler), 837 | (r'/api/tasks/([a-zA-Z0-9-]+)', TaskEditHandler), 838 | (r'/api/model/train', ModelTrainHandler), 839 | (r'/api/model/config', ModelConfigHandler), 840 | (r'/api/model/predict', ModelPredictHandler), 841 | (r'/(.*)', tornado.web.StaticFileHandler, dict(path='./client', default_filename='index.html')) 842 | ]) 843 | 844 | try: 845 | # spawn server processes 846 | server = tornado.httpserver.HTTPServer(app, max_buffer_size=1024 ** 3) 847 | server.bind(tornado.options.options.port) 848 | server.start(tornado.options.options.np) 849 | 850 | # connect to database 851 | if tornado.options.options.backend == 'file': 852 | app.settings['db'] = backend.FileBackend(tornado.options.options.url_file) 853 | 854 | elif tornado.options.options.backend == 'mongo': 855 | app.settings['db'] = backend.MongoBackend(tornado.options.options.url_mongo) 856 | 857 | else: 858 | raise KeyError('Backend must be either \'json\' or \'mongo\'') 859 | 860 | # start the event loop 861 | print('The API is listening on http://0.0.0.0:%d' % (tornado.options.options.port), flush=True) 862 | tornado.ioloop.IOLoop.current().start() 863 | 864 | except KeyboardInterrupt: 865 | tornado.ioloop.IOLoop.current().stop() 866 | -------------------------------------------------------------------------------- /bin/visualizer.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | 6 | 7 | 8 | def select_rows_by_values(df, column, values): 9 | return pd.concat([df[df[column].astype(str) == v] for v in values]) 10 | 11 | 12 | 13 | def is_continuous(df, column): 14 | return column != None and df[column].dtype.kind in 'biufcmM' 15 | 16 | 17 | 18 | def is_discrete(df, column): 19 | return column != None and df[column].dtype.kind in 'OSUV' 20 | 21 | 22 | 23 | def contingency_table(x, y, data, **kwargs): 24 | # compute indices for categorical variables 25 | x_values = sorted(list(set(x))) 26 | y_values = sorted(list(set(y))) 27 | x_idx = [x_values.index(x_i) for x_i in x] 28 | y_idx = [y_values.index(y_i) for y_i in y] 29 | 30 | # create contingency table 31 | ct = pd.DataFrame( 32 | np.zeros((len(y_values), len(x_values))), 33 | index=y_values, 34 | columns=x_values, 35 | dtype=np.int32) 36 | 37 | for x_i, y_i in zip(x_idx, y_idx): 38 | ct.iloc[y_i, x_i] += 1 39 | 40 | # plot contingency table 41 | sns.heatmap(ct, annot=True, fmt='d', cbar=False, square=True, **kwargs) 42 | 43 | 44 | 45 | def visualize(data, args): 46 | defaults = { 47 | 'plot_type': None, 48 | 'yaxis': None, 49 | 'row': None, 50 | 'col': None, 51 | 'hue': None, 52 | 'selectors': [], 53 | 'sharex': False, 54 | 'sharey': False, 55 | 'height': 3, 56 | 'aspect': 1, 57 | 'color': None, 58 | 'palette': None, 59 | 'xscale': None, 60 | 'yscale': None, 61 | 'rotate_xticklabels': False, 62 | 'rotate_yticklabels': False 63 | } 64 | 65 | args = {**defaults, **args} 66 | 67 | # prepare axis columns in dataframe 68 | axes = [ 69 | args['xaxis'], 70 | args['yaxis'], 71 | args['row'], 72 | args['col'], 73 | args['hue'] 74 | ] 75 | 76 | for column in axes: 77 | # skip columns which were not specified 78 | if column == None: 79 | continue 80 | 81 | # remove rows which have missing values in column 82 | data = data[~data[column].isna()] 83 | 84 | # apply selectorss to dataframe 85 | for selector in args['selectors']: 86 | # parse column and selected values 87 | column, values = selector.split('=') 88 | values = values.split(',') 89 | 90 | # select rows from dataframe 91 | if values != None and len(values) > 0: 92 | data = select_rows_by_values(data, column, values) 93 | 94 | if len(data.index) == 0: 95 | raise RuntimeError('error: no data to visualize') 96 | 97 | # sort data by row, col, and hue values 98 | if args['row'] != None: 99 | data.sort_values(by=args['row'], inplace=True, kind='mergesort') 100 | 101 | if args['col'] != None: 102 | data.sort_values(by=args['col'], inplace=True, kind='mergesort') 103 | 104 | if args['hue'] != None: 105 | data.sort_values(by=args['hue'], inplace=True, kind='mergesort') 106 | 107 | # create a facet grid for plotting 108 | g = sns.FacetGrid( 109 | data, 110 | row=args['row'], 111 | col=args['col'], 112 | sharex=args['sharex'], 113 | sharey=args['sharey'], 114 | height=args['height'], 115 | aspect=args['aspect'], 116 | margin_titles=True) 117 | 118 | # determine plot type if not specified 119 | if args['plot_type'] == None: 120 | # if x is continuous, use histogram 121 | if is_continuous(data, args['xaxis']) and args['yaxis'] == None: 122 | args['plot_type'] = 'hist' 123 | 124 | # if x is discrete, use count plot 125 | elif is_discrete(data, args['xaxis']) and args['yaxis'] == None: 126 | args['plot_type'] = 'count' 127 | 128 | # if x and y are continuous, use scatter plot 129 | elif is_continuous(data, args['xaxis']) and is_continuous(data, args['yaxis']): 130 | args['plot_type'] = 'scatter' 131 | 132 | # if x and y are discrete, use contingency table 133 | elif is_discrete(data, args['xaxis']) and is_discrete(data, args['yaxis']): 134 | args['plot_type'] = 'ct' 135 | 136 | # if x is discrete and y is continuous, use bar plot 137 | elif is_discrete(data, args['xaxis']) and is_continuous(data, args['yaxis']): 138 | args['plot_type'] = 'bar' 139 | 140 | # otherwise throw an error 141 | else: 142 | raise RuntimeError('error: could not find a plotting method for the given axes') 143 | 144 | # create order of x values for discrete plots 145 | # unless y-axis sorting is enabled (so as not to override it) 146 | if is_discrete(data, args['xaxis']): 147 | x_values = sorted(list(set(data[args['xaxis']]))) 148 | else: 149 | x_values = None 150 | 151 | # create plot 152 | if args['plot_type'] == 'hist': 153 | g.map( 154 | sns.histplot, 155 | args['xaxis'], 156 | color=args['color']) 157 | 158 | elif args['plot_type'] == 'count': 159 | g.map( 160 | sns.countplot, 161 | args['xaxis'], 162 | hue=args['hue'], 163 | color=args['color'], 164 | palette=args['palette']) 165 | 166 | elif args['plot_type'] == 'scatter': 167 | g = g.map( 168 | sns.scatterplot, 169 | args['xaxis'], 170 | args['yaxis'], 171 | hue=args['hue'], 172 | data=data, 173 | color=args['color']) 174 | 175 | if args['hue'] != None: 176 | g.add_legend() 177 | 178 | elif args['plot_type'] == 'ct': 179 | g = g.map( 180 | contingency_table, 181 | args['xaxis'], 182 | args['yaxis'], 183 | data=data, 184 | color=args['color']) 185 | 186 | elif args['plot_type'] == 'bar': 187 | g = g.map( 188 | sns.barplot, 189 | args['xaxis'], 190 | args['yaxis'], 191 | hue=args['hue'], 192 | data=data, 193 | ci=68, 194 | color=args['color'], 195 | palette=args['palette'], 196 | order=x_values) 197 | 198 | if args['hue'] != None: 199 | g.add_legend() 200 | 201 | elif args['plot_type'] == 'point': 202 | g = g.map( 203 | sns.pointplot, 204 | args['xaxis'], 205 | args['yaxis'], 206 | hue=args['hue'], 207 | data=data, 208 | ci=68, 209 | capsize=0.1, 210 | color=args['color'], 211 | palette=args['palette'], 212 | markers='x', 213 | linestyles='--', 214 | order=x_values) 215 | 216 | if args['hue'] != None: 217 | g.add_legend() 218 | 219 | # set x-axis scale if specified 220 | if args['xscale'] != None: 221 | g.set(xscale=args['xscale']) 222 | 223 | # set y-axis scale if specified 224 | if args['yscale'] != None: 225 | g.set(yscale=args['yscale']) 226 | 227 | # rotate x-axis tick labels if specified 228 | if args['rotate_xticklabels']: 229 | plt.xticks(rotation=45) 230 | 231 | # rotate y-axis tick labels if specified 232 | if args['rotate_yticklabels']: 233 | plt.yticks(rotation=45) 234 | 235 | # disable x-axis ticks if there are too many categories 236 | if is_discrete(data, args['xaxis']) and len(set(data[args['xaxis']])) >= 100: 237 | plt.xticks([]) 238 | 239 | # save output figure 240 | outfile = '/tmp/%s.png' % (args['plot_name']) 241 | plt.savefig(outfile) 242 | plt.close() 243 | 244 | return outfile -------------------------------------------------------------------------------- /bin/workflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import asyncio 4 | import os 5 | import signal 6 | import subprocess 7 | 8 | import env 9 | 10 | 11 | 12 | def get_run_name(workflow): 13 | return 'workflow-%s-%04d' % (workflow['_id'], workflow['attempts']) 14 | 15 | 16 | 17 | def run_workflow(workflow, work_dir, resume): 18 | # save current directory 19 | prev_dir = os.getcwd() 20 | 21 | # change to workflow directory 22 | os.chdir(work_dir) 23 | 24 | # prepare command line arguments 25 | run_name = get_run_name(workflow) 26 | 27 | if env.NXF_EXECUTOR == 'k8s': 28 | args = [ 29 | 'nextflow', 30 | '-log', os.path.join(workflow['output_dir'], 'nextflow.log'), 31 | 'kuberun', 32 | workflow['pipeline'], 33 | '-ansi-log', 'false', 34 | '-latest', 35 | '-name', run_name, 36 | '-profile', workflow['profiles'], 37 | '-revision', workflow['revision'], 38 | '-volume-mount', env.PVC_NAME 39 | ] 40 | 41 | elif env.NXF_EXECUTOR == 'local': 42 | args = [ 43 | 'nextflow', 44 | '-log', os.path.join(workflow['output_dir'], 'nextflow.log'), 45 | 'run', 46 | workflow['pipeline'], 47 | '-ansi-log', 'false', 48 | '-latest', 49 | '-name', run_name, 50 | '-profile', workflow['profiles'], 51 | '-revision', workflow['revision'] 52 | ] 53 | 54 | elif env.NXF_EXECUTOR == 'pbspro': 55 | args = [ 56 | 'nextflow', 57 | '-log', os.path.join(workflow['output_dir'], 'nextflow.log'), 58 | 'run', 59 | workflow['pipeline'], 60 | '-ansi-log', 'false', 61 | '-latest', 62 | '-name', run_name, 63 | '-profile', workflow['profiles'], 64 | '-revision', workflow['revision'] 65 | ] 66 | 67 | # add params file if specified 68 | if workflow['params_format'] and workflow['params_data']: 69 | params_filename = 'params.%s' % (workflow['params_format']) 70 | params_file = open(params_filename, 'w') 71 | params_file.write(workflow['params_data']) 72 | params_file.close() 73 | 74 | args += ['-params-file', params_filename] 75 | 76 | # add resume option if specified 77 | if resume: 78 | args += ['-resume'] 79 | 80 | # launch workflow asynchronously 81 | proc = subprocess.Popen( 82 | args, 83 | stdout=open('.workflow.log', 'w'), 84 | stderr=subprocess.STDOUT 85 | ) 86 | 87 | # return to original directory 88 | os.chdir(prev_dir) 89 | 90 | return proc 91 | 92 | 93 | 94 | def save_output(workflow, output_dir): 95 | return subprocess.Popen( 96 | ['scripts/kube-save.sh', workflow['_id'], output_dir], 97 | stdout=subprocess.PIPE, 98 | stderr=subprocess.STDOUT 99 | ) 100 | 101 | 102 | 103 | async def set_property(db, workflow, key, value): 104 | workflow[key] = value 105 | await db.workflow_update(workflow['_id'], workflow) 106 | 107 | 108 | 109 | async def launch_async(db, workflow, resume): 110 | # re-initialize database backend 111 | db.initialize() 112 | 113 | # start workflow 114 | work_dir = os.path.join(env.WORKFLOWS_DIR, workflow['_id']) 115 | proc = run_workflow(workflow, work_dir, resume) 116 | proc_pid = proc.pid 117 | 118 | print('%d: saving workflow pid...' % (proc_pid)) 119 | 120 | # save workflow pid 121 | await set_property(db, workflow, 'pid', proc.pid) 122 | 123 | print('%d: waiting for workflow to finish...' % (proc_pid)) 124 | 125 | # wait for workflow to complete 126 | if proc.wait() == 0: 127 | print('%d: workflow completed' % (proc_pid)) 128 | await set_property(db, workflow, 'status', 'completed') 129 | else: 130 | print('%d: workflow failed' % (proc_pid)) 131 | await set_property(db, workflow, 'status', 'failed') 132 | return 133 | 134 | print('%d: saving output data...' % (proc_pid)) 135 | 136 | # save output data 137 | output_dir = os.path.join(env.WORKFLOWS_DIR, workflow['_id'], workflow['output_dir']) 138 | proc = save_output(workflow, output_dir) 139 | 140 | proc_out, _ = proc.communicate() 141 | print(proc_out.decode('utf-8')) 142 | 143 | if proc.wait() == 0: 144 | print('%d: save output data completed' % (proc_pid)) 145 | else: 146 | print('%d: save output data failed' % (proc_pid)) 147 | 148 | 149 | 150 | def launch(db, workflow, resume): 151 | asyncio.run(launch_async(db, workflow, resume)) 152 | 153 | 154 | 155 | def cancel(workflow): 156 | # terminate child process 157 | if workflow['pid'] != -1: 158 | try: 159 | os.kill(workflow['pid'], signal.SIGINT) 160 | except ProcessLookupError: 161 | pass 162 | 163 | # delete pods if relevant 164 | if env.NXF_EXECUTOR == 'k8s': 165 | proc = subprocess.Popen( 166 | ['scripts/kube-cancel.sh', get_run_name(workflow)], 167 | stdout=subprocess.PIPE, 168 | stderr=subprocess.STDOUT 169 | ) 170 | proc_out, _ = proc.communicate() 171 | print(proc_out.decode('utf-8')) 172 | -------------------------------------------------------------------------------- /cli/cancel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Cancel a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | 13 | # cancel a workflow instance 14 | curl -s -X POST ${URL}/api/workflows/${ID}/cancel 15 | 16 | echo 17 | -------------------------------------------------------------------------------- /cli/create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Create a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | PIPELINE="$2" 12 | 13 | # create a workflow instance 14 | curl -s \ 15 | -X POST \ 16 | -d "{\"pipeline\":\"${PIPELINE}\"}" \ 17 | ${URL}/api/workflows/0 18 | 19 | echo 20 | -------------------------------------------------------------------------------- /cli/delete.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Delete a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | 13 | # delete a workflow instance 14 | curl -s -X DELETE ${URL}/api/workflows/${ID} 15 | 16 | echo 17 | -------------------------------------------------------------------------------- /cli/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download output data for a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | 13 | # download output data for a workflow instance 14 | curl -s -o "${ID}-output.tar.gz" ${URL}/api/workflows/${ID}/download 15 | 16 | echo 17 | -------------------------------------------------------------------------------- /cli/get.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Get a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | 13 | # get a workflow instance 14 | curl -s -X GET ${URL}/api/workflows/${ID} 15 | 16 | echo 17 | -------------------------------------------------------------------------------- /cli/launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Launch a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | 13 | # launch a workflow instance 14 | curl -s -X POST ${URL}/api/workflows/${ID}/launch 15 | 16 | echo 17 | -------------------------------------------------------------------------------- /cli/log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Get the log of a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 2 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | 13 | # get the log of a workflow instance 14 | curl -s -X GET ${URL}/api/workflows/${ID}/log 15 | 16 | echo 17 | -------------------------------------------------------------------------------- /cli/query.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # List all workflow instances on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 1 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | 12 | # list all workflow instances 13 | curl -s -X GET ${URL}/api/workflows 14 | 15 | echo 16 | -------------------------------------------------------------------------------- /cli/upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Upload input data for a workflow instance on a nextflow server. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 3 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | URL="$1" 11 | ID="$2" 12 | FILENAME="$3" 13 | 14 | # upload data to a workflow instance 15 | curl -s \ 16 | -F "filename=$(basename FILENAME)" \ 17 | -F "body=@${FILENAME}" \ 18 | ${URL}/api/workflows/${ID}/upload 19 | 20 | echo 21 | -------------------------------------------------------------------------------- /client/app.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const app = angular.module('app', [ 4 | 'ngRoute', 5 | 'angularFileUpload' 6 | ]) 7 | 8 | 9 | 10 | app.config(['$compileProvider', function($compileProvider) { 11 | $compileProvider.debugInfoEnabled(false) 12 | }]) 13 | 14 | 15 | 16 | app.config(['$routeProvider', function($routeProvider) { 17 | $routeProvider 18 | .when('/', { redirectTo: '/workflows' }) 19 | .when('/workflows', { 20 | templateUrl: 'views/workflows.html', 21 | controller: 'WorkflowsCtrl' 22 | }) 23 | .when('/workflows/:id', { 24 | templateUrl: 'views/workflow.html', 25 | controller: 'WorkflowCtrl' 26 | }) 27 | .when('/tasks', { 28 | templateUrl: 'views/tasks.html', 29 | controller: 'TasksCtrl' 30 | }) 31 | .when('/tasks/:id', { 32 | templateUrl: 'views/task.html', 33 | controller: 'TaskCtrl' 34 | }) 35 | .when('/visualizer', { 36 | templateUrl: 'views/visualizer.html', 37 | controller: 'VisualizerCtrl' 38 | }) 39 | .when('/model', { 40 | templateUrl: 'views/model.html', 41 | controller: 'ModelCtrl' 42 | }) 43 | .otherwise('/') 44 | }]) 45 | 46 | 47 | 48 | app.service('alert', ['$interval', function($interval) { 49 | this.alerts = [] 50 | 51 | const self = this 52 | let count = 0 53 | 54 | const addAlert = function(type, header, message) { 55 | let id = count 56 | let promise = $interval(function() { 57 | let index = self.alerts.findIndex(function(alert) { 58 | return (alert.id === id) 59 | }) 60 | 61 | self.alerts.splice(index, 1) 62 | }, 10000, 1) 63 | 64 | self.alerts.push({ 65 | id: id, 66 | type: type, 67 | header: header, 68 | message: message, 69 | promise: promise 70 | }) 71 | count++ 72 | } 73 | 74 | this.success = function(message) { 75 | addAlert('success', null, message) 76 | } 77 | 78 | this.info = function(message) { 79 | addAlert('info', null, message) 80 | } 81 | 82 | this.warning = function(message) { 83 | addAlert('warning', null, message) 84 | } 85 | 86 | this.error = function(message) { 87 | addAlert('danger', 'Error: ', message) 88 | } 89 | 90 | this.remove = function(index) { 91 | $interval.cancel(self.alerts[index].promise) 92 | 93 | self.alerts.splice(index, 1) 94 | } 95 | }]) 96 | 97 | 98 | 99 | app.service('api', ['$http', '$q', function($http, $q) { 100 | function httpRequest(method, url, params, data) { 101 | return $http({ 102 | method: method, 103 | url: window.location.pathname + url, 104 | params: params, 105 | data: data 106 | }).then(function(res) { 107 | return res.data 108 | }, function(res) { 109 | return $q.reject(res.data) 110 | }) 111 | } 112 | 113 | this.Workflow = {} 114 | 115 | this.Workflow.query = function(page) { 116 | return httpRequest('get', 'api/workflows', { page: page }) 117 | } 118 | 119 | this.Workflow.get = function(id) { 120 | return httpRequest('get', `api/workflows/${id}`) 121 | } 122 | 123 | this.Workflow.save = function(workflow) { 124 | return httpRequest('post', `api/workflows/${workflow._id}`, null, workflow) 125 | } 126 | 127 | this.Workflow.launch = function(id) { 128 | return httpRequest('post', `api/workflows/${id}/launch`) 129 | } 130 | 131 | this.Workflow.resume = function(id) { 132 | return httpRequest('post', `api/workflows/${id}/resume`) 133 | } 134 | 135 | this.Workflow.cancel = function(id) { 136 | return httpRequest('post', `api/workflows/${id}/cancel`) 137 | } 138 | 139 | this.Workflow.log = function(id) { 140 | return httpRequest('get', `api/workflows/${id}/log`) 141 | } 142 | 143 | this.Workflow.remove = function(id) { 144 | return httpRequest('delete', `api/workflows/${id}`) 145 | } 146 | 147 | this.Task = {} 148 | 149 | this.Task.query = function(page) { 150 | return httpRequest('get', 'api/tasks', { page: page }) 151 | } 152 | 153 | this.Task.query_pipelines = function() { 154 | return httpRequest('get', `api/tasks/pipelines`) 155 | } 156 | 157 | this.Task.query_pipeline = function(pipeline) { 158 | return httpRequest('get', `api/tasks/pipelines/${pipeline}`) 159 | } 160 | 161 | this.Task.archive = function(pipeline) { 162 | return httpRequest('get', `api/tasks/archive/${pipeline}`) 163 | } 164 | 165 | this.Task.get = function(id) { 166 | return httpRequest('get', `api/tasks/${id}`) 167 | } 168 | 169 | this.Task.log = function(id) { 170 | return httpRequest('get', `api/tasks/${id}/log`) 171 | } 172 | 173 | this.Task.visualize = function(pipeline, process, args) { 174 | return httpRequest('post', `api/tasks/visualize`, null, { 175 | pipeline, 176 | process, 177 | args 178 | }) 179 | } 180 | 181 | this.Model = {} 182 | 183 | this.Model.train = function(pipeline, process, args) { 184 | return httpRequest('post', `api/model/train`, null, { 185 | pipeline, 186 | process, 187 | args 188 | }) 189 | } 190 | 191 | this.Model.get_config = function(pipeline, process, target) { 192 | return httpRequest('get', `api/model/config`, { 193 | pipeline, 194 | process, 195 | target 196 | }) 197 | } 198 | 199 | this.Model.predict = function(pipeline, process, target, inputs) { 200 | return httpRequest('post', `api/model/predict`, null, { 201 | pipeline, 202 | process, 203 | target, 204 | inputs 205 | }) 206 | } 207 | }]) 208 | 209 | 210 | 211 | app.controller('MainCtrl', ['$scope', 'alert', function($scope, alert) { 212 | $scope.alert = alert 213 | }]) 214 | 215 | 216 | 217 | const STATUS_COLORS = { 218 | 'nascent': 'primary', 219 | 'running': 'warning', 220 | 'completed': 'success', 221 | 'failed': 'danger' 222 | } 223 | 224 | 225 | 226 | app.controller('WorkflowsCtrl', ['$scope', '$route', 'alert', 'api', function($scope, $route, alert, api) { 227 | $scope.STATUS_COLORS = STATUS_COLORS 228 | $scope.page = 0 229 | $scope.workflows = [] 230 | 231 | $scope.query = function(page) { 232 | api.Workflow.query(page) 233 | .then(function(workflows) { 234 | $scope.page = page 235 | $scope.workflows = workflows 236 | }, function() { 237 | alert.error('Failed to query workflow instances.') 238 | }) 239 | } 240 | 241 | $scope.delete = function(w) { 242 | if ( !confirm(`Are you sure you want to delete \"${w._id}\"?`) ) { 243 | return 244 | } 245 | 246 | api.Workflow.remove(w._id) 247 | .then(function() { 248 | alert.success('Workflow instance deleted.') 249 | $route.reload() 250 | }, function() { 251 | alert.error('Failed to delete workflow instance.') 252 | }) 253 | } 254 | 255 | // initialize 256 | $scope.query(0) 257 | }]) 258 | 259 | 260 | 261 | app.controller('WorkflowCtrl', ['$scope', '$interval', '$route', 'alert', 'api', 'FileUploader', function($scope, $interval, $route, alert, api, FileUploader) { 262 | $scope.STATUS_COLORS = STATUS_COLORS 263 | $scope.workflow = {} 264 | 265 | $scope.uploader = new FileUploader({ 266 | url: `${window.location.pathname}api/workflows/${$route.current.params.id}/upload` 267 | }) 268 | 269 | $scope.uploader.onCompleteAll = function() { 270 | alert.success('All input files uploaded.') 271 | $scope.uploading = false 272 | $route.reload() 273 | } 274 | 275 | $scope.uploader.onErrorItem = function() { 276 | alert.error('Failed to upload input files.') 277 | $scope.uploading = false 278 | } 279 | 280 | $scope.save = function(workflow) { 281 | api.Workflow.save(workflow) 282 | .then(function(res) { 283 | alert.success('Workflow instance saved.') 284 | $route.updateParams({ id: res._id }) 285 | }, function() { 286 | alert.error('Failed to save workflow instance.') 287 | }) 288 | } 289 | 290 | $scope.upload = function() { 291 | $scope.uploading = true 292 | $scope.uploader.uploadAll() 293 | } 294 | 295 | $scope.launch = function(id) { 296 | $scope.launching = true 297 | 298 | api.Workflow.launch(id) 299 | .then(function() { 300 | alert.success('Workflow instance launched.') 301 | $scope.workflow.status = '' 302 | $scope.workflow.log = '' 303 | $scope.launching = false 304 | $scope.fetchLog() 305 | }, function() { 306 | alert.error('Failed to launch workflow instance.') 307 | $scope.launching = false 308 | }) 309 | } 310 | 311 | $scope.resume = function(id) { 312 | $scope.resuming = true 313 | 314 | api.Workflow.resume(id) 315 | .then(function() { 316 | alert.success('Workflow instance resumed.') 317 | $scope.workflow.status = '' 318 | $scope.workflow.log = '' 319 | $scope.resuming = false 320 | $scope.fetchLog() 321 | }, function() { 322 | alert.error('Failed to resume workflow instance.') 323 | $scope.resuming = false 324 | }) 325 | } 326 | 327 | $scope.cancel = function(id) { 328 | $scope.cancelling = true 329 | 330 | api.Workflow.cancel(id) 331 | .then(function() { 332 | alert.success('Workflow instance canceled.') 333 | $scope.cancelling = false 334 | $route.reload() 335 | }, function() { 336 | alert.error('Failed to cancel workflow instance.') 337 | $scope.cancelling = false 338 | }) 339 | } 340 | 341 | $scope.fetchLog = function() { 342 | if ( $scope.intervalPromise ) { 343 | return 344 | } 345 | 346 | $scope.intervalPromise = $interval(function() { 347 | api.Workflow.log($scope.workflow._id) 348 | .then(function(res) { 349 | Object.assign($scope.workflow, res) 350 | 351 | if ( res.status !== 'running' ) { 352 | $interval.cancel($scope.intervalPromise) 353 | $scope.intervalPromise = undefined 354 | } 355 | }) 356 | }, 2000, -1) 357 | } 358 | 359 | $scope.$on('$destroy', function() { 360 | if ( angular.isDefined($scope.intervalPromise) ) { 361 | $interval.cancel($scope.intervalPromise) 362 | } 363 | }) 364 | 365 | // initialize 366 | api.Workflow.get($route.current.params.id) 367 | .then(function(workflow) { 368 | $scope.workflow = workflow 369 | 370 | if ( $scope.workflow._id !== '0' ) { 371 | $scope.fetchLog() 372 | } 373 | }, function() { 374 | alert.error('Failed to load workflow.') 375 | }) 376 | }]) 377 | 378 | 379 | 380 | app.controller('TasksCtrl', ['$scope', 'alert', 'api', function($scope, alert, api) { 381 | $scope.page = 0 382 | $scope.tasks = [] 383 | 384 | $scope.query_pipelines = function() { 385 | api.Task.query_pipelines() 386 | .then(function(pipelines) { 387 | $scope.pipelines = pipelines 388 | }, function() { 389 | alert.error('Failed to query pipelines.') 390 | }) 391 | } 392 | 393 | $scope.query_tasks = function(page) { 394 | api.Task.query(page) 395 | .then(function(tasks) { 396 | $scope.page = page 397 | $scope.tasks = tasks 398 | }, function() { 399 | alert.error('Failed to query tasks.') 400 | }) 401 | } 402 | 403 | $scope.archive = function(pipeline) { 404 | $scope.archiving = true 405 | 406 | api.Task.archive(pipeline) 407 | .then(function() { 408 | $scope.archiving = false 409 | $scope.archive_success = true 410 | 411 | alert.success('Archive was created.') 412 | }, function() { 413 | $scope.archiving = false 414 | $scope.archive_success = false 415 | 416 | alert.error('Failed to create archive.') 417 | }) 418 | } 419 | 420 | // initialize 421 | $scope.query_pipelines() 422 | $scope.query_tasks(0) 423 | }]) 424 | 425 | 426 | 427 | app.controller('TaskCtrl', ['$scope', '$route', 'alert', 'api', function($scope, $route, alert, api) { 428 | $scope.task = {} 429 | $scope.task_out = '' 430 | $scope.task_err = '' 431 | 432 | $scope.fetchLog = function() { 433 | api.Task.log($route.current.params.id) 434 | .then(function(res) { 435 | $scope.task_out = res.out 436 | $scope.task_err = res.err 437 | }, function() { 438 | alert.error('Failed to fetch task logs.') 439 | }) 440 | } 441 | 442 | // initialize 443 | api.Task.get($route.current.params.id) 444 | .then(function(task) { 445 | $scope.task = task 446 | }, function() { 447 | alert.error('Failed to load task.') 448 | }) 449 | }]) 450 | 451 | 452 | 453 | app.controller('VisualizerCtrl', ['$scope', 'alert', 'api', function($scope, alert, api) { 454 | $scope.args = { 455 | selectors: 'exit=0', 456 | height: 3, 457 | aspect: 1 458 | } 459 | $scope.columns = [] 460 | $scope.merge_columns = [] 461 | 462 | $scope.query_pipelines = function() { 463 | api.Task.query_pipelines() 464 | .then(function(pipelines) { 465 | $scope.pipelines = pipelines 466 | }, function() { 467 | alert.error('Failed to query pipelines.') 468 | }) 469 | } 470 | 471 | $scope.query_dataset = function(pipeline) { 472 | $scope.querying = true 473 | 474 | api.Task.query_pipeline(pipeline) 475 | .then(function(data) { 476 | let process_names = Object.keys(data) 477 | let process_columns = process_names.reduce((prev, process) => { 478 | let tasks = data[process] 479 | let columns = new Set(tasks.reduce((p, t) => p.concat(Object.keys(t)), [])) 480 | prev[process] = Array.from(columns) 481 | return prev 482 | }, {}) 483 | 484 | $scope.querying = false 485 | $scope.pipeline_data = data 486 | $scope.process_names = process_names 487 | $scope.process_columns = process_columns 488 | }, function() { 489 | $scope.querying = false 490 | alert.error('Failed to query pipeline tasks.') 491 | }) 492 | } 493 | 494 | $scope.update_columns = function(process_columns, process, merge_process) { 495 | let array1 = process ? process_columns[process] : [] 496 | let array2 = merge_process ? process_columns[merge_process] : [] 497 | 498 | $scope.columns = Array.from(new Set(array1.concat(array2))) 499 | $scope.merge_columns = array1.filter(value => array2.includes(value)); 500 | } 501 | 502 | $scope.visualize = function(pipeline, process, args) { 503 | $scope.visualizing = true 504 | 505 | api.Task.visualize(pipeline, process, args) 506 | .then(function(image_data) { 507 | $scope.visualizing = false 508 | $scope.visualize_success = true 509 | $scope.image_data = image_data 510 | alert.success('Visualiation was created.') 511 | }, function() { 512 | $scope.visualizing = false 513 | $scope.visualize_success = false 514 | alert.error('Failed to visualize data.') 515 | }) 516 | } 517 | 518 | // initialize 519 | $scope.query_pipelines() 520 | }]) 521 | 522 | 523 | 524 | app.controller('ModelCtrl', ['$scope', 'alert', 'api', function($scope, alert, api) { 525 | $scope.args = { 526 | merge_process: null, 527 | inputs: [], 528 | target: null, 529 | scaler: 'maxabs', 530 | selectors: 'exit=0', 531 | hidden_layer_sizes: '128 128 128', 532 | epochs: 200 533 | } 534 | $scope.columns = [] 535 | $scope.merge_columns = [] 536 | 537 | $scope.train = {} 538 | $scope.predict = {} 539 | 540 | $scope.query_pipelines = function() { 541 | api.Task.query_pipelines() 542 | .then(function(pipelines) { 543 | $scope.pipelines = pipelines 544 | }, function() { 545 | alert.error('Failed to query pipelines.') 546 | }) 547 | } 548 | 549 | $scope.query_dataset = function(pipeline) { 550 | $scope.querying = true 551 | 552 | api.Task.query_pipeline(pipeline) 553 | .then(function(data) { 554 | let process_names = Object.keys(data) 555 | let process_columns = process_names.reduce((prev, process) => { 556 | let tasks = data[process] 557 | let columns = new Set(tasks.reduce((p, t) => p.concat(Object.keys(t)), [])) 558 | prev[process] = Array.from(columns) 559 | return prev 560 | }, {}) 561 | 562 | $scope.querying = false 563 | $scope.pipeline_data = data 564 | $scope.process_names = process_names 565 | $scope.process_columns = process_columns 566 | }, function() { 567 | $scope.querying = false 568 | alert.error('Failed to query pipeline tasks.') 569 | }) 570 | } 571 | 572 | $scope.update_columns = function(process_columns, process, merge_process) { 573 | let array1 = process ? process_columns[process] : [] 574 | let array2 = merge_process ? process_columns[merge_process] : [] 575 | 576 | $scope.columns = Array.from(new Set(array1.concat(array2))) 577 | $scope.merge_columns = array1.filter(value => array2.includes(value)); 578 | } 579 | 580 | $scope.train = function(pipeline, process, args) { 581 | $scope.training = true 582 | 583 | api.Model.train(pipeline, process, args) 584 | .then(function(results) { 585 | $scope.training = false 586 | $scope.train.results = results 587 | alert.success('Model was trained.') 588 | }, function() { 589 | $scope.training = false 590 | alert.error('Failed to train model.') 591 | }) 592 | } 593 | 594 | $scope.get_config = function(pipeline, process, target) { 595 | api.Model.get_config(pipeline, process, target) 596 | .then(function(config) { 597 | $scope.config = config 598 | $scope.predict.options = config.inputs 599 | $scope.predict.inputs = Object.keys(config.inputs).reduce((prev, input) => { 600 | prev[input] = null 601 | return prev 602 | }, {}) 603 | 604 | console.log($scope.predict) 605 | }, function() { 606 | alert.error('Failed to get model config.') 607 | }) 608 | } 609 | 610 | $scope.predict = function(pipeline, process, target, inputs) { 611 | $scope.predicting = true 612 | 613 | api.Model.predict(pipeline, process, target, inputs) 614 | .then(function(results) { 615 | $scope.predicting = false 616 | $scope.predict.results = results 617 | alert.success('Performed model prediction.') 618 | }, function() { 619 | $scope.predicting = false 620 | alert.error('Failed to perform model prediction.') 621 | }) 622 | } 623 | 624 | // initialize 625 | $scope.query_pipelines() 626 | }]) 627 | -------------------------------------------------------------------------------- /client/css/style.css: -------------------------------------------------------------------------------- 1 | /* typography rules */ 2 | body { 3 | font-family: monospace; 4 | } 5 | 6 | /* rules for button icons */ 7 | .button-icon { 8 | color: #808080; 9 | } 10 | 11 | .button-icon:hover, 12 | .button-icon:focus { 13 | text-decoration: none; 14 | cursor: pointer; 15 | color: #303030; 16 | } 17 | 18 | /* rules for alerts */ 19 | #alerts { 20 | z-index: 2000; 21 | position: fixed; 22 | bottom: 0px; 23 | left: 20px; 24 | width: 300px; 25 | } 26 | 27 | /* rules for pre text */ 28 | pre { 29 | background-color: #f5f5f5; 30 | } 31 | -------------------------------------------------------------------------------- /client/favicon.ico: -------------------------------------------------------------------------------- 1 | h(    2 |         3 |       4 |        5 |       -------------------------------------------------------------------------------- /client/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Nextflow API 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 29 | 30 |
31 | 32 |
33 | 34 |
35 |
36 | 39 |
40 | {{a.header}} 41 | {{a.message}} 42 |
43 |
44 |
45 | 46 | 47 | -------------------------------------------------------------------------------- /client/views/model.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
Query Dataset
5 | 6 |
7 |
8 | 9 | Querying pipelines... 10 |
11 |
12 | 13 |
14 |
15 | 16 |
17 | 18 |
19 |
20 | 21 |
22 | 26 |
27 |
28 |
29 | 30 |
31 |
Train Model
32 | 33 |
34 |
35 | 36 |
37 | 38 |
39 |
40 | 41 |
42 | 43 |
44 | 47 |
48 |
49 | 50 |
51 | 52 |
53 | 54 |
55 |
56 | 57 |
58 | 59 |
60 | 61 |
62 |
63 | 64 |
65 | 66 |
67 | 68 |
69 |
70 | 71 |
72 | 73 |
74 | 75 |
76 |
77 | 78 |
79 | 80 |
81 | 84 |
85 |
86 | 87 |
88 | 89 |
90 | 91 |
92 |
93 | 94 |
95 | 96 |
97 | 98 |
99 |
100 | 101 |
102 | 106 |
107 |
108 |
109 | 110 |
111 |
Training Results
112 | 113 |
114 |
115 | 116 |
117 |

{{train.results.mpe | number:3}} %

118 |
119 |
120 | 121 |
122 | 123 |
124 |

{{train.results.cov | number:3}} %

125 |
126 |
127 |
128 | 129 |
130 | 131 |
132 |
133 | 134 |
135 |
Predict
136 | 137 |
138 |
139 | 140 |
141 | 142 |
143 |
144 | 145 |
146 | 147 |
148 | 149 |
150 |
151 | 152 |
153 | 156 |
157 |
158 | 159 |
160 |
161 | 162 |
163 | 164 |
165 |
166 | 167 |
168 |
169 | 170 |
171 | 175 |
176 |
177 |
178 | 179 |
180 |
Prediction Results
181 | 182 |
183 |
{{predict.results | json}}
184 |
185 |
186 |
187 |
188 | -------------------------------------------------------------------------------- /client/views/task.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
Task
5 | 6 |
7 |
{{task | json}}
8 |
9 |
10 | 11 |
12 |
Logs
13 | 14 |
15 |
16 | 17 |
18 | 19 |
20 | 21 |

Output Log:

22 |
{{task_out}}
23 | 24 |
25 | 26 |

Error Log:

27 |
{{task_err}}
28 |
29 |
30 |
31 |
32 | -------------------------------------------------------------------------------- /client/views/tasks.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
Tasks
5 | 6 |
7 |
8 |
9 | 10 | 11 |
12 |
13 | 14 |
15 |
16 | 20 |
21 |
22 | Download 23 |
24 |
25 |
26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 38 | 39 | 40 | 41 | 42 |
IDRun NameTimeEvent
36 | {{t._id.slice(0, 8)}} 37 | {{t.runName}}{{t.utcTime | date:'short'}}{{t.event}}
43 |
44 |
45 |
46 | -------------------------------------------------------------------------------- /client/views/visualizer.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
Query Dataset
5 | 6 |
7 |
8 | 9 | Querying pipelines... 10 |
11 |
12 | 13 |
14 |
15 | 16 |
17 | 18 |
19 |
20 | 21 |
22 | 26 |
27 |
28 |
29 | 30 |
31 |
Visualize
32 | 33 |
34 |
35 | 36 |
37 | 38 |
39 |
40 | 41 |
42 | 43 |
44 | 47 |
48 |
49 | 50 |
51 | 52 |
53 | 54 |
55 |
56 | 57 |
58 | 59 |
60 | 69 |
70 |
71 | 72 |
73 | 74 |
75 | 76 |
77 |
78 | 79 |
80 | 81 |
82 | 85 |
86 |
87 | 88 |
89 | 90 |
91 | 94 |
95 |
96 | 97 |
98 | 99 |
100 | 103 |
104 |
105 | 106 |
107 | 108 |
109 | 112 |
113 |
114 | 115 |
116 | 117 |
118 | 119 |
120 |
121 | 122 |
123 | 124 |
125 | 126 |
127 |
128 | 129 |
130 | 131 |
132 | 133 |
134 |
135 | 136 |
137 | 141 |
142 |
143 | 144 |
145 | 146 |
147 |
148 |
149 |
150 | -------------------------------------------------------------------------------- /client/views/workflow.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
Workflow
5 | 6 |
7 |
8 | 9 |
10 |

{{workflow._id}}

11 |
12 |
13 | 14 |
15 | 16 |
17 |

{{workflow.date_created | date:'short'}}

18 |
19 |
20 | 21 |
22 | 23 |
24 |

{{workflow.date_submitted | date:'short'}}

25 |
26 |
27 | 28 |
29 | 30 |
31 | 36 |
37 |
38 | 39 |
40 | 41 |
42 | 49 |
50 |
51 | 52 |
53 | 54 |
55 | 62 |
63 |
64 | 65 |
66 | 67 |
68 | 75 |
76 |
77 | 78 |
79 | 80 |
81 | 88 |
89 |
90 | 91 |
92 | 93 |
94 | 100 |
101 |
102 | 103 |
104 | 105 |
106 | 113 |
114 |
115 | 116 |
117 | 118 |
119 | 126 |
127 |
128 | 129 |
130 | 131 |
132 |

none

133 |

134 | {{f}} 135 |

136 |
137 |
138 | 139 |
140 | 141 |
142 |

none

143 |

144 | {{f}} 145 |

146 |
147 |
148 | 149 |
150 | 151 |
152 |

153 | Download 154 |

155 |
156 |
157 | 158 |
159 | 160 |
161 | 162 | Cancel 163 |
164 |
165 |
166 | 167 |
168 |
Input Data
169 | 170 |
171 |
172 | 173 |
174 |

none

175 |

{{item.file.name}}

176 | 177 | 178 |
179 |
180 | 181 |
182 | 186 |
187 |
188 |
189 | 190 |
191 |
Execution
192 | 193 |
194 |
195 | 196 |
197 |

198 | {{workflow.status}} 199 |

200 |
201 |
202 | 203 |
204 | 205 |
206 |

207 | {{workflow.attempts}} 208 |

209 |
210 |
211 | 212 |
213 | 217 | 218 | 222 | 223 | 227 |
228 | 229 |
230 | 231 |
{{workflow.log}}
232 |
233 |
234 |
235 |
236 | -------------------------------------------------------------------------------- /client/views/workflows.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
Workflows
5 | 6 |
7 | Create workflow 8 |
9 | 10 |
11 |
12 | 13 | 14 |
15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | 35 | 38 | 39 |
IDDate CreatedNamePipelineStatus
27 | {{w._id.slice(0, 8)}} 28 | {{w.date_created | date:'short'}}{{w.name}}{{w.pipeline}} 33 | {{w.status}} 34 | 36 | × 37 |
40 |
41 |
42 |
43 | -------------------------------------------------------------------------------- /helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for the nextflow-api 4 | name: nextflow-api 5 | version: 0.1.2 6 | -------------------------------------------------------------------------------- /helm/README.md: -------------------------------------------------------------------------------- 1 | # Deploy Nextflow-API to Kubernetes Using Helm 2 | 3 | This guide assumes you have access to a K8s cluster, and either a valid PVC or storage class on that cluster. 4 | 5 | ## Install Helm 6 | 7 | Follow the [installation instructions](https://helm.sh/docs/intro/install) from the Helm documentation to install Helm. The Helm chart for Nextflow-API is confirmed to work on [Helm v3.0.0-beta3](https://github.com/helm/helm/releases/tag/v3.0.0-beta.3), but it is failing on many newer versions of Helm, so if you have issues deploying Nextflow-API then try using that exact version. 8 | 9 | Helm 3 is used because it does not require installing anything on the K8s cluster, while Helm 2 requires the user to install Tiller. This chart should work with Helm 2 if needed. 10 | 11 | ## Configure Nextflow-API Helm Chart 12 | 13 | The file `values.yaml` contains all of the configurable values for the chart. 14 | 15 | Edit the following sections: 16 | 17 | #### PVC 18 | ``` 19 | # PVC 20 | NewLocalPVC: 21 | # If true, create new PVC on local cluster. 22 | # (temp, future PVCs will be dynamically configurable) 23 | Enabled: true 24 | Name: nextflow-api-local 25 | StorageClass: nfs 26 | Size: 20Gi 27 | 28 | ExistingLocalPVC: 29 | # If true, use existing PVC on local cluster. 30 | # (temp, future PVCs will be dynamically configurable) 31 | Enabled: false 32 | Name: deepgtex-prp 33 | ``` 34 | 35 | If you want to create a new PVC: 36 | 37 | 1. Set `NewLocalPVC` to `true` and `ExistingLocalPVC` to `false` 38 | 2. Change the `Name` to the PVC you have set up on your K8s cluster. 39 | 3. Change the `StorageClass` and `Size` to whatever storage class and size you want to use. 40 | 41 | If you want to use an existing PVC: 42 | 43 | 1. Set `NewLocalPVC` to `false` and `ExistingLocalPVC` to `true` 44 | 2. Change the `Name` to the PVC you have set up on your K8s cluster. 45 | 46 | __TODO__: Remote cluster configuration (disregard and leave `false` for now) 47 | 48 | #### Database and Web Server Deployments 49 | ``` 50 | # Database deployment settings 51 | Database: 52 | # Resource requests and limits per container 53 | Resources: 54 | Requests: 55 | CPU: 4 56 | Memory: 8Gi 57 | Limits: 58 | CPU: 8 59 | Memory: 16Gi 60 | 61 | # Web server deployment settings 62 | WebServer: 63 | # Number of containers 64 | Replicas: 1 65 | # Resource requests and limits per container 66 | Resources: 67 | Requests: 68 | CPU: 1 69 | Memory: 4Gi 70 | Limits: 71 | CPU: 1 72 | Memory: 4Gi 73 | ``` 74 | 75 | Nextflow-API contains a database deployment and a web server deployment, which can optionally include multiple replicas. Note that you must use a `LoadBalancer` in order to have multiple web server replicas. 76 | 77 | #### Ingress / LoadBalancer 78 | ``` 79 | # Ingress control settings 80 | Ingress: 81 | # If true, use ingress control. 82 | # Otherwise, generic LoadBalancer networking will be used, 83 | # and the other settings in this section will be ignored. 84 | Enabled: false 85 | # The subdomain to associate with this service. 86 | Host: nextflow-api.nautilus.optiputer.net 87 | Class: traefik 88 | ``` 89 | 90 | Nextflow-API will either use an `Ingress` or a `LoadBalancer` to expose itself to the public Internet. 91 | 92 | To use an `Ingress`: 93 | 94 | 1. Set `Enabled` to `true` 95 | 2. Change the `Host` to `nextflow-api.`. (ex. `nextflow-api.scigateway.net`) 96 | 3. Change the `Class` if needed. 97 | 98 | To use a `LoadBalancer`, simply set `Enabled` to `false` 99 | 100 | Now the Helm Chart is configured and ready to deploy! 101 | 102 | ## Deploy Nextflow-API 103 | 104 | Navigate to `nextflow-api/helm` 105 | 106 | Deploy using `helm install nextflow-api .` 107 | 108 | ## Use Nextflow-API 109 | 110 | #### Give Nextflow the necessary permissions to deploy jobs to your K8s cluster. 111 | ```` 112 | kubectl create rolebinding default-edit --clusterrole=edit --serviceaccount=default:default 113 | kubectl create rolebinding default-view --clusterrole=view --serviceaccount=default:default 114 | ```` 115 | 116 | These commands give the default service account the ability to view and edit cluster resources. Nextflow driver pods use this account to deploy process pods. This creates rolebindings in the `default` namespace. If you are not in the default namespace, use `KUBE_EDITOR="nano" kubectl edit rolebinding `. Edit the `namespace` to the one you are using, then save. 117 | 118 | #### Ingress 119 | 120 | If you are using an `Ingress`, simply navigate in your web browser to the `Host` that you specified. 121 | 122 | #### LoadBalancer 123 | 124 | If you are using a `LoadBalancer`: 125 | 126 | 1. Run `kubectl get service` to list the services that are running in your cluster. 127 | 2. Find the service named `nextflow-api` and record the `EXTERNAL-IP`. 128 | 3. Navigate in your web browser to `:8080` 129 | 130 | All done! Now you can use Nextflow-API to submit and monitor workflows. 131 | 132 | ## Delete Deployment 133 | 134 | To delete the deployment, run `helm uninstall nextflow-api`. 135 | -------------------------------------------------------------------------------- /helm/gen-secret.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export KUBE_CONFIG=$(cat ~/.kube/config | base64 | tr -d '\n') 4 | 5 | cat > templates/secret.yaml <&2 echo "usage: $0 " 7 | exit 1 8 | fi 9 | 10 | DATABASE="nextflow_api" 11 | DUMP="dump" 12 | BACKUPS="/workspace/_backups" 13 | TYPE="$1" 14 | 15 | # remove existing dump directory 16 | rm -rf ${DUMP} 17 | 18 | # dump database to dump directory 19 | mongodump -d ${DATABASE} -o ${DUMP} 20 | 21 | # create archive of dump directory 22 | tar -czvf $(date +"${BACKUPS}/${TYPE}_%Y_%m_%d.tar.gz") ${DUMP} 23 | 24 | # remove older archives of the same type 25 | NUM_BACKUPS=$(ls ${BACKUPS}/${TYPE}_* | wc -l) 26 | MAX_BACKUPS=10 27 | 28 | if [[ ${NUM_BACKUPS} > ${MAX_BACKUPS} ]]; then 29 | rm -f "$(ls ${BACKUPS}/${TYPE}_* | head -n 1)" 30 | fi 31 | -------------------------------------------------------------------------------- /scripts/db-restore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Restore a database from an archive 3 | 4 | # parse command-line arguments 5 | if [[ $# != 1 ]]; then 6 | >&2 echo "usage: $0 " 7 | exit 1 8 | fi 9 | 10 | ARCHIVE="$1" 11 | DUMP="dump" 12 | DATABASE="nextflow_api" 13 | 14 | # remove existing dump directory 15 | rm -rf ${DUMP} 16 | 17 | # extract archive to dump directory 18 | tar -xvf ${ARCHIVE} 19 | 20 | # restore database from archive 21 | mongorestore --drop --nsInclude ${DATABASE}.* --noIndexRestore ${DUMP} 22 | -------------------------------------------------------------------------------- /scripts/db-startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Startup script for kubernetes deployment. 3 | 4 | # start mongodb service 5 | mkdir -p /data/db 6 | mkdir -p /var/log/mongodb 7 | 8 | mongod \ 9 | --fork \ 10 | --dbpath /data/db \ 11 | --logpath /var/log/mongodb/mongod.log \ 12 | --bind_ip 0.0.0.0 13 | 14 | # initialize backups directory 15 | BACKUPS="/workspace/_backups" 16 | 17 | mkdir -p ${BACKUPS} 18 | 19 | # restore database backup if present 20 | LATEST=$(ls ${BACKUPS} | tail -n 1) 21 | 22 | if [[ ! -z ${LATEST} ]]; then 23 | scripts/db-restore.sh "${BACKUPS}/${LATEST}" 24 | fi 25 | 26 | # create cronjob to backup database daily 27 | echo "00 06 * * * ${PWD}/scripts/db-backup.sh daily" | crontab - 28 | -------------------------------------------------------------------------------- /scripts/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE_NAME="bentsherman/nextflow-api" 4 | 5 | set -ex 6 | 7 | # remove data files 8 | rm -rf _models _trace _workflows .nextflow* db.json db.pkl 9 | 10 | # build docker image 11 | docker build -t ${IMAGE_NAME} . 12 | docker push ${IMAGE_NAME} 13 | 14 | # deploy helm chart to kubernetes cluster 15 | helm uninstall nextflow-api 16 | helm install nextflow-api ./helm 17 | -------------------------------------------------------------------------------- /scripts/kube-cancel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Remove all pods associated with a given workflow run. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 1 ]]; then 6 | echo "usage: $0 " 7 | exit -1 8 | fi 9 | 10 | RUN_NAME="$1" 11 | 12 | # query list of pods 13 | PODS=`kubectl get pods --output custom-columns=NAME:.metadata.name,RUN:.metadata.labels.runName \ 14 | | grep ${RUN_NAME} \ 15 | | awk '{ print $1 }'` 16 | 17 | # delete pods 18 | if [[ ! -z ${PODS} ]]; then 19 | kubectl delete pods ${PODS} 20 | fi 21 | -------------------------------------------------------------------------------- /scripts/kube-config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Configure kubectl to use a given context on startup. 3 | 4 | # parse command-line arguments 5 | if [[ $# != 1 ]]; then 6 | echo "usage: $0 [options]" 7 | exit -1 8 | fi 9 | 10 | PVC_NAME="$1" 11 | ID="$2" 12 | PIPELINE="$3" 13 | 14 | shift 3 15 | OPTIONS="$*" 16 | 17 | POD_NAME="nextflow-api-${ID}" 18 | SPEC_FILE="${POD_NAME}.yaml" 19 | PVC_PATH="/workspace" 20 | 21 | # write pod spec to file 22 | cat > ${SPEC_FILE} < " 7 | exit -1 8 | fi 9 | 10 | ID="$1" 11 | SRC_PATH="$2" 12 | DST_DIRNAME="$(dirname ${SRC_PATH})" 13 | 14 | # replace any links with the original files 15 | for f in $(find ${SRC_PATH} -type l); do 16 | cp --remove-destination $(readlink $f) $f 17 | done 18 | 19 | # copy log file into output folder 20 | cp ${DST_DIRNAME}/.workflow.log ${SRC_PATH}/workflow.log 21 | 22 | # remove old nextflow reports (except for logs) 23 | rm -f ${SRC_PATH}/reports/report.html.* 24 | rm -f ${SRC_PATH}/reports/timeline.html.* 25 | rm -f ${SRC_PATH}/reports/trace.txt.* 26 | 27 | # create archive of output data 28 | cd ${DST_DIRNAME} 29 | 30 | tar -czf "${ID}-output.tar.gz" $(basename ${SRC_PATH})/* 31 | -------------------------------------------------------------------------------- /scripts/startup-local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Startup script for local environment. 3 | 4 | # parse command-line arguments 5 | if [[ $# == 1 ]]; then 6 | BACKEND="$1" 7 | else 8 | echo "usage: $0 " 9 | exit -1 10 | fi 11 | 12 | # initialize environment 13 | source ${HOME}/anaconda3/etc/profile.d/conda.sh 14 | conda activate nextflow-api 15 | 16 | # start mongodb server 17 | if [[ ${BACKEND} == "mongo" ]]; then 18 | sudo service mongodb start 19 | fi 20 | 21 | # start web server 22 | export NXF_EXECUTOR="local" 23 | export TF_CPP_MIN_LOG_LEVEL="3" 24 | 25 | bin/server.py --backend=${BACKEND} 26 | -------------------------------------------------------------------------------- /scripts/startup-nautilus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Startup script for Nautilus/Kubernetes environment. 3 | 4 | # parse command-line arguments 5 | if [[ $# == 1 ]]; then 6 | BACKEND="$1" 7 | elif [[ $# == 2 ]]; then 8 | BACKEND="$1" 9 | KUBE_CONTEXT="$2" 10 | else 11 | echo "usage: $0 [kube-context]" 12 | exit -1 13 | fi 14 | 15 | # start mongodb server 16 | if [[ ${BACKEND} == "mongo" ]]; then 17 | scripts/db-startup.sh 18 | fi 19 | 20 | # configure kubectl context if specified 21 | if [[ ! -z ${KUBE_CONTEXT} ]]; then 22 | scripts/kube-config.sh ${KUBE_CONTEXT} 23 | fi 24 | 25 | # start web server 26 | export TF_CPP_MIN_LOG_LEVEL="3" 27 | 28 | bin/server.py --backend=${BACKEND} 29 | -------------------------------------------------------------------------------- /scripts/startup-palmetto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Startup script for Palmetto environment. 3 | 4 | # parse command-line arguments 5 | if [[ $# == 1 ]]; then 6 | BACKEND="$1" 7 | else 8 | echo "usage: $0 " 9 | exit -1 10 | fi 11 | 12 | # load modules 13 | module purge 14 | module load anaconda3/5.1.0-gcc/8.3.1 15 | module load nextflow/20.07.1 16 | 17 | # initialize environment 18 | source activate nextflow-api 19 | 20 | # start mongodb server 21 | if [[ ${BACKEND} == "mongo" ]]; then 22 | killall mongod 23 | 24 | mongod \ 25 | --fork \ 26 | --dbpath /mongo/${USER}/data \ 27 | --logpath /mongo/${USER}/mongod.log \ 28 | --bind_ip_all 29 | fi 30 | 31 | # start web server 32 | export NXF_EXECUTOR="pbspro" 33 | export TF_CPP_MIN_LOG_LEVEL="3" 34 | 35 | bin/server.py --backend=${BACKEND} 36 | --------------------------------------------------------------------------------