├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── bin
    ├── backend.py
    ├── env.py
    ├── model.py
    ├── server.py
    ├── visualizer.py
    └── workflow.py
├── cli
    ├── cancel.sh
    ├── create.sh
    ├── delete.sh
    ├── download.sh
    ├── get.sh
    ├── launch.sh
    ├── log.sh
    ├── query.sh
    └── upload.sh
├── client
    ├── app.js
    ├── css
    │   └── style.css
    ├── favicon.ico
    ├── index.html
    └── views
    │   ├── model.html
    │   ├── task.html
    │   ├── tasks.html
    │   ├── visualizer.html
    │   ├── workflow.html
    │   └── workflows.html
├── helm
    ├── Chart.yaml
    ├── README.md
    ├── gen-secret.sh
    ├── templates
    │   ├── _helpers.tpl
    │   ├── nextflow-api.yaml
    │   └── pvc.yaml
    └── values.yaml
├── requirements.txt
└── scripts
    ├── convert-json-pkl.py
    ├── db-backup.sh
    ├── db-restore.sh
    ├── db-startup.sh
    ├── deploy.sh
    ├── kube-cancel.sh
    ├── kube-config.sh
    ├── kube-run.sh
    ├── kube-save.sh
    ├── startup-local.sh
    ├── startup-nautilus.sh
    └── startup-palmetto.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | _models
 2 | _trace
 3 | _workflows
 4 | 
 5 | helm/.helmignore
 6 | helm/.nextflow.log
 7 | helm/.sops.yaml
 8 | helm/templates/secret.yaml
 9 | 
10 | .ipynb_checkpoints
11 | *.json
12 | .nextflow*
13 | *.pkl
14 | *.pyc
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | ENV NXF_VER="21.04.3"
 5 | EXPOSE 8080
 6 | EXPOSE 27017
 7 | 
 8 | # install package dependencies
 9 | RUN apt-get update -qq \
10 | 	&& apt-get install -qq -y \
11 | 		apt-transport-https \
12 | 		apt-utils \
13 | 		ca-certificates \
14 | 		cron \
15 | 		curl \
16 | 		git \
17 | 		mongodb \
18 | 		openjdk-8-jre \
19 | 		python3.7 \
20 | 		python3-pip \
21 | 		zip
22 | 
23 | # change python to refer to python 3.7
24 | RUN rm /usr/bin/python3 && ln -s python3.7 /usr/bin/python3
25 | 
26 | # install kubectl
27 | RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \
28 | 	&& echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list \
29 | 	&& apt-get update -qq \
30 | 	&& apt-get install -qq -y kubectl
31 | 
32 | # install nextflow
33 | RUN curl -s https://get.nextflow.io | bash \
34 | 	&& mv nextflow /usr/local/bin \
35 | 	&& nextflow info
36 | 
37 | # install nextflow-api from build context
38 | WORKDIR /opt/nextflow-api
39 | 
40 | COPY . .
41 | 
42 | # install python dependencies
43 | RUN python3 -m pip install --upgrade pip
44 | RUN python3 -m pip install -r requirements.txt
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Fan Jiang, Cole McKnight, Benjamin Shealy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Nextflow-API
 2 | 
 3 | Nextflow-API is a web application and REST API for submitting and monitoring Nextflow pipelines on a variety of execution environments. The REST API is implemented in Python using the ([Tornado](https://www.tornadoweb.org/en/stable/)) framework, and the client-side application is implemented using [AngularJS](https://angularjs.org/). Nextflow-API can be deployed locally or to a Kubernetes cluster. There is also experimental support for PBS, and Nextflow-API can be extended to other Nextflow-supported executors upon request.
 4 | 
 5 | ## Deployment
 6 | 
 7 | ### Local
 8 | 
 9 | Install the dependencies as shown in the [Dockerfile](docker/Dockerfile). Depending on your setup, you may not need to install `mongodb` or `kubectl`. You may also prefer to install the Python dependencies in an Anaconda environment:
10 | ```bash
11 | conda create -n nextflow-api python=3.7
12 | conda activate nextflow-api
13 | pip install -r requirements.txt
14 | ```
15 | 
16 | Use `scripts/startup-local.sh` to deploy Nextflow-API locally, although you may need to modify the script to fit your environment.
17 | 
18 | ### Palmetto
19 | 
20 | To use Nexflow-API on the Palmetto cluster, you will need to provision a Login VM, install the Python dependencies in an Anaconda environment, and either request a MongoDB allocation or use the `file` backend. Use `scripts/startup-palmetto.sh` to deploy Nextflow-API, although you may need to modify the script to fit your environment. You will only be able to access the web interface from the campus network or the Clemson VPN. For long-running deployments, run the script within a screen on your Login VM.
21 | 
22 | ### Kubernetes
23 | 
24 | Refer to the [helm](helm/README.md) for instructions on how to deploy Nextflow-API to a Kubernetes cluster.
25 | 
26 | ## Usage
27 | 
28 | The core of Nextflow-API is a REST API which provides an interface to run Nextflow pipelines and can be integrated with third-party services. Nextflow-API provides a collection of [CLI scripts](cli) to demonstrate how to use the API, as well as a web interface for end users.
29 | 
30 | ### Backends
31 | 
32 | Nextflow-API stores workflow runs and tasks in one of several "backend" formats. The `file` backend stores the data in a single `pkl` file, which is ideal for local testing. The `mongo` backend stores the data in a Mongo database, which is ideal for production.
33 | 
34 | ### API Endpoints
35 | 
36 | | Endpoint                       | Method | Description                                 |
37 | |--------------------------------|--------|---------------------------------------------|
38 | | `/api/workflows`               | GET    | List all workflow instances                 |
39 | | `/api/workflows`               | POST   | Create a workflow instance                  |
40 | | `/api/workflows/{id}`          | GET    | Get a workflow instance                     |
41 | | `/api/workflows/{id}`          | POST   | Update a workflow instance                  |
42 | | `/api/workflows/{id}`          | DELETE | Delete a workflow instance                  |
43 | | `/api/workflows/{id}/upload`   | POST   | Upload input files to a workflow instance   |
44 | | `/api/workflows/{id}/launch`   | POST   | Launch a workflow instance                  |
45 | | `/api/workflows/{id}/log`      | GET    | Get the log of a workflow instance          |
46 | | `/api/workflows/{id}/download` | GET    | Download the output data as a tarball       |
47 | | `/api/tasks`                   | GET    | List all tasks                              |
48 | | `/api/tasks`                   | POST   | Save a task (used by Nextflow)              |
49 | 
50 | ### Lifecycle
51 | 
52 | First, the user calls the API to create a workflow instance. Along with the API call, the user must provide the __name of the Nextflow pipeline__. The payload of the API call is shown below.
53 | 
54 | ```json
55 | {
56 |   "pipeline": "systemsgenetics/kinc-nf"
57 | }
58 | ```
59 | 
60 | Then the user uploads the input files (including `nextflow.config`) for the workflow instance.
61 | 
62 | After the input and config files in place, the user can launch the workflow. The launch starts with uploading of the input files to `<id>/input` on the PVC. The jobs running as distributed pods in k8s will read the input data from here, and work together in the dedicated workspace prefixed with `<id>`.
63 | 
64 | Once the workflow is launched, the status and log will be available via the API. Ideally, higher-level services can call the API periodically to fetch the latest log of the workflow instance.
65 | 
66 | After the run is done, the user can call the API to download the output files. The output files are placed in `<id>/output` on the PVC. The API will compress the directory as a `tar.gz` file for downloading.
67 | 
68 | The user can call the API to delete the workflow instance and purge its data once done with it.
69 | 
70 | ### Resource Usage Monitoring and Prediction
71 | 
72 | Nextflow-API automatically collects resource usage data generated by Nextflow, including metrics like runtime, CPU utilization, memory usage, and bytes read/written. Through the web interface you can download this data as CSV files, create visualizations, and train prediction models for specific pipelines and processes. These features were adapted from [tesseract](https://github.com/bentsherman/tesseract), a command-line tool for resource prediction.


--------------------------------------------------------------------------------
/bin/backend.py:
--------------------------------------------------------------------------------
  1 | import motor.motor_tornado
  2 | import multiprocessing as mp
  3 | import pickle
  4 | import pymongo
  5 | 
  6 | 
  7 | 
  8 | class Backend():
  9 | 	def __init__(self):
 10 | 		pass
 11 | 
 12 | 	def initialize(self):
 13 | 		pass
 14 | 
 15 | 	async def workflow_query(self, page, page_size):
 16 | 		raise NotImplementedError()
 17 | 
 18 | 	async def workflow_create(self, workflow):
 19 | 		raise NotImplementedError()
 20 | 
 21 | 	async def workflow_get(self, id):
 22 | 		raise NotImplementedError()
 23 | 
 24 | 	async def workflow_update(self, id, workflow):
 25 | 		raise NotImplementedError()
 26 | 
 27 | 	async def workflow_delete(self, id):
 28 | 		raise NotImplementedError()
 29 | 
 30 | 	async def task_query(self, page, page_size):
 31 | 		raise NotImplementedError()
 32 | 
 33 | 	async def task_create(self, task):
 34 | 		raise NotImplementedError()
 35 | 
 36 | 	async def task_get(self, id):
 37 | 		raise NotImplementedError()
 38 | 
 39 | 
 40 | 
 41 | class FileBackend(Backend):
 42 | 
 43 | 	def __init__(self, url):
 44 | 		self._lock = mp.Lock()
 45 | 		self._url = url
 46 | 		self.initialize()
 47 | 
 48 | 	def initialize(self, error_not_found=False):
 49 | 		# load database from pickle file
 50 | 		try:
 51 | 			self.load()
 52 | 
 53 | 		# initialize empty database if pickle file doesn't exist
 54 | 		except FileNotFoundError:
 55 | 			self._db = {
 56 | 				'workflows': [],
 57 | 				'tasks': []
 58 | 			}
 59 | 			self.save()
 60 | 
 61 | 	def load(self):
 62 | 		self._db = pickle.load(open(self._url, 'rb'))
 63 | 
 64 | 	def save(self):
 65 | 		pickle.dump(self._db, open(self._url, 'wb'))
 66 | 
 67 | 	async def workflow_query(self, page, page_size):
 68 | 		self._lock.acquire()
 69 | 		self.load()
 70 | 
 71 | 		# sort workflows by date_created in descending order
 72 | 		self._db['workflows'].sort(key=lambda w: w['date_created'], reverse=True)
 73 | 
 74 | 		# return the specified page of workflows
 75 | 		workflows = self._db['workflows'][(page * page_size) : ((page + 1) * page_size)]
 76 | 
 77 | 		self._lock.release()
 78 | 
 79 | 		return workflows
 80 | 
 81 | 	async def workflow_create(self, workflow):
 82 | 		self._lock.acquire()
 83 | 		self.load()
 84 | 
 85 | 		# append workflow to list of workflows
 86 | 		self._db['workflows'].append(workflow)
 87 | 
 88 | 		self.save()
 89 | 		self._lock.release()
 90 | 
 91 | 	async def workflow_get(self, id):
 92 | 		self._lock.acquire()
 93 | 		self.load()
 94 | 
 95 | 		# search for workflow by id
 96 | 		workflow = None
 97 | 
 98 | 		for w in self._db['workflows']:
 99 | 			if w['_id'] == id:
100 | 				workflow = w
101 | 				break
102 | 
103 | 		self._lock.release()
104 | 
105 | 		# return workflow or raise error if workflow wasn't found
106 | 		if workflow != None:
107 | 			return workflow
108 | 		else:
109 | 			raise IndexError('Workflow was not found')
110 | 
111 | 	async def workflow_update(self, id, workflow):
112 | 		self._lock.acquire()
113 | 		self.load()
114 | 
115 | 		# search for workflow by id and update it
116 | 		found = False
117 | 
118 | 		for i, w in enumerate(self._db['workflows']):
119 | 			if w['_id'] == id:
120 | 				# update workflow
121 | 				self._db['workflows'][i] = workflow
122 | 				found = True
123 | 				break
124 | 
125 | 		self.save()
126 | 		self._lock.release()
127 | 
128 | 		# raise error if workflow wasn't found
129 | 		if not found:
130 | 			raise IndexError('Workflow was not found')
131 | 
132 | 	async def workflow_delete(self, id):
133 | 		self._lock.acquire()
134 | 		self.load()
135 | 
136 | 		# search for workflow by id and delete it
137 | 		found = False
138 | 
139 | 		for i, w in enumerate(self._db['workflows']):
140 | 			if w['_id'] == id:
141 | 				# delete workflow
142 | 				self._db['workflows'].pop(i)
143 | 				found = True
144 | 				break
145 | 
146 | 		self.save()
147 | 		self._lock.release()
148 | 
149 | 		# raise error if workflow wasn't found
150 | 		if not found:
151 | 			raise IndexError('Workflow was not found')
152 | 
153 | 	async def task_query(self, page, page_size):
154 | 		self._lock.acquire()
155 | 		self.load()
156 | 
157 | 		# sort tasks by date_created in descending order
158 | 		self._db['tasks'].sort(key=lambda t: t['utcTime'], reverse=True)
159 | 
160 | 		# return the specified page of workflows
161 | 		tasks = self._db['tasks'][(page * page_size) : ((page + 1) * page_size)]
162 | 
163 | 		self._lock.release()
164 | 
165 | 		return tasks
166 | 
167 | 	async def task_query_pipelines(self):
168 | 		self._lock.acquire()
169 | 		self.load()
170 | 
171 | 		# extract list of unique pipelines from all 'started' events
172 | 		pipelines = [t['metadata']['workflow']['projectName'] for t in self._db['tasks'] if t['event'] == 'started']
173 | 		pipelines = list(set(pipelines))
174 | 
175 | 		self._lock.release()
176 | 
177 | 		return pipelines
178 | 
179 | 	async def task_query_pipeline(self, pipeline):
180 | 		self._lock.acquire()
181 | 		self.load()
182 | 
183 | 		# find all runs of the given pipeline
184 | 		run_ids = [t['runId'] for t in self._db['tasks'] if t['event'] == 'started' and t['metadata']['workflow']['projectName'] == pipeline]
185 | 
186 | 		# find all tasks associated with the given runs
187 | 		tasks = [t for t in self._db['tasks'] if t['event'] == 'process_completed' and t['runId'] in run_ids]
188 | 
189 | 		self._lock.release()
190 | 
191 | 		return tasks
192 | 
193 | 	async def task_create(self, task):
194 | 		self._lock.acquire()
195 | 		self.load()
196 | 
197 | 		# append workflow to list of workflows
198 | 		self._db['tasks'].append(task)
199 | 
200 | 		self.save()
201 | 		self._lock.release()
202 | 
203 | 	async def task_get(self, id):
204 | 		self._lock.acquire()
205 | 		self.load()
206 | 
207 | 		# search for task by id
208 | 		task = None
209 | 
210 | 		for t in self._db['tasks']:
211 | 			if t['_id'] == id:
212 | 				task = t
213 | 				break
214 | 
215 | 		self._lock.release()
216 | 
217 | 		# raise error if task wasn't found
218 | 		if task != None:
219 | 			return task
220 | 		else:
221 | 			raise IndexError('Task was not found')
222 | 
223 | 
224 | 
225 | class MongoBackend(Backend):
226 | 	def __init__(self, url):
227 | 		self._url = url
228 | 		self.initialize()
229 | 
230 | 	def initialize(self):
231 | 		self._client = motor.motor_tornado.MotorClient(self._url)
232 | 		self._db = self._client['nextflow_api']
233 | 
234 | 	async def workflow_query(self, page, page_size):
235 | 		return await self._db.workflows \
236 | 			.find() \
237 | 			.sort('date_created', pymongo.DESCENDING) \
238 | 			.skip(page * page_size) \
239 | 			.to_list(length=page_size)
240 | 
241 | 	async def workflow_create(self, workflow):
242 | 		return await self._db.workflows.insert_one(workflow)
243 | 
244 | 	async def workflow_get(self, id):
245 | 		return await self._db.workflows.find_one({ '_id': id })
246 | 
247 | 	async def workflow_update(self, id, workflow):
248 | 		return await self._db.workflows.replace_one({ '_id': id }, workflow)
249 | 
250 | 	async def workflow_delete(self, id):
251 | 		return await self._db.workflows.delete_one({ '_id': id })
252 | 
253 | 	async def task_query(self, page, page_size):
254 | 		return await self._db.tasks \
255 | 			.find({}, { '_id': 1, 'runName': 1, 'utcTime': 1, 'event': 1 }) \
256 | 			.sort('utcTime', pymongo.DESCENDING) \
257 | 			.skip(page * page_size) \
258 | 			.to_list(length=page_size)
259 | 
260 | 	async def task_query_pipelines(self):
261 | 		# find all 'started' events
262 | 		tasks = await self._db.tasks \
263 | 			.find({ 'event': 'started' }, { 'metadata.workflow.projectName': 1 }) \
264 | 			.to_list(length=None)
265 | 
266 | 		# extract list of unique pipelines
267 | 		pipelines = [t['metadata']['workflow']['projectName'] for t in tasks]
268 | 		pipelines = list(set(pipelines))
269 | 
270 | 		return pipelines
271 | 
272 | 	async def task_query_pipeline(self, pipeline):
273 | 		# find all runs of the given pipeline
274 | 		runs = await self._db.tasks \
275 | 			.find({ 'event': 'started', 'metadata.workflow.projectName': pipeline }, { 'runId': 1 }) \
276 | 			.to_list(length=None)
277 | 
278 | 		run_ids = [run['runId'] for run in runs]
279 | 
280 | 		# find all tasks associated with the given runs
281 | 		tasks = await self._db.tasks \
282 | 			.find({ 'event': 'process_completed', 'runId': { '$in': run_ids } }) \
283 | 			.to_list(length=None)
284 | 
285 | 		return tasks
286 | 
287 | 	async def task_create(self, task):
288 | 		return await self._db.tasks.insert_one(task)
289 | 
290 | 	async def task_get(self, id):
291 | 		return await self._db.tasks.find_one({ '_id': id })
292 | 


--------------------------------------------------------------------------------
/bin/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # load settings from environment variables
 4 | NXF_EXECUTOR = os.environ.get('NXF_EXECUTOR', default='local')
 5 | PVC_NAME = os.environ.get('PVC_NAME')
 6 | 
 7 | # define working directories
 8 | BASE_DIRS = {
 9 | 	'k8s':    '/workspace',
10 | 	'local':  '.',
11 | 	'pbspro': '.'
12 | }
13 | BASE_DIR = BASE_DIRS[NXF_EXECUTOR]
14 | 
15 | MODELS_DIR = os.path.join(BASE_DIR, '_models')
16 | TRACE_DIR = os.path.join(BASE_DIR, '_trace')
17 | WORKFLOWS_DIR = os.path.join(BASE_DIR, '_workflows')
18 | 
19 | # validate environment settings
20 | if NXF_EXECUTOR == 'k8s' and PVC_NAME is None:
21 | 	raise EnvironmentError('Using k8s executor but PVC is not defined')


--------------------------------------------------------------------------------
/bin/model.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import dill as pickle
  3 | import forestci
  4 | import h5py
  5 | import io
  6 | import json
  7 | import numpy as np
  8 | import pandas as pd
  9 | import scipy.stats
 10 | import sklearn.base
 11 | import sklearn.dummy
 12 | import sklearn.ensemble
 13 | import sklearn.metrics
 14 | import sklearn.model_selection
 15 | import sklearn.pipeline
 16 | import sklearn.preprocessing
 17 | from tensorflow import keras
 18 | 
 19 | import env
 20 | 
 21 | 
 22 | 
 23 | def check_std(y_pred):
 24 | 	if isinstance(y_pred, tuple):
 25 | 		return y_pred
 26 | 	else:
 27 | 		return y_pred, np.zeros_like(y_pred)
 28 | 
 29 | 
 30 | 
 31 | def predict_intervals(y_bar, y_std, ci=0.95):
 32 | 	# compute z score
 33 | 	_, n_stds = scipy.stats.norm.interval(ci)
 34 | 
 35 | 	# compute intervals
 36 | 	y_lower = y_bar - n_stds * y_std
 37 | 	y_upper = y_bar + n_stds * y_std
 38 | 
 39 | 	return y_lower, y_upper
 40 | 
 41 | 
 42 | 
 43 | class KerasRegressor(keras.wrappers.scikit_learn.KerasRegressor):
 44 | 
 45 | 	def __getstate__(self):
 46 | 		state = self.__dict__
 47 | 		if 'model' in state:
 48 | 			model = state['model']
 49 | 			model_hdf5_bio = io.BytesIO()
 50 | 			with h5py.File(model_hdf5_bio, mode='w') as file:
 51 | 				model.save(file)
 52 | 			state['model'] = model_hdf5_bio
 53 | 			state_copy = copy.deepcopy(state)
 54 | 			state['model'] = model
 55 | 			return state_copy
 56 | 		else:
 57 | 			return state
 58 | 
 59 | 	def __setstate__(self, state):
 60 | 		if 'model' in state:
 61 | 			model_hdf5_bio = state['model']
 62 | 			with h5py.File(model_hdf5_bio, mode='r') as file:
 63 | 				state['model'] = keras.models.load_model(file)
 64 | 		self.__dict__ = state
 65 | 
 66 | 	def predict(self, x):
 67 | 		return np.squeeze(self.model(x))
 68 | 
 69 | 
 70 | 
 71 | class KerasRegressorWithIntervals(KerasRegressor):
 72 | 
 73 | 	def inverse_tau(self, N, lmbda=1e-5, p_dropout=0.1, ls_2=0.005):
 74 | 		return (2 * N * lmbda) / (1 - p_dropout) / ls_2
 75 | 
 76 | 	def fit(self, X, y):
 77 | 		# fit neural network
 78 | 		history = super(KerasRegressorWithIntervals, self).fit(X, y)
 79 | 
 80 | 		# save training set size for tau adjustment
 81 | 		self.n_train_samples = X.shape[0]
 82 | 
 83 | 		return history
 84 | 
 85 | 	def predict(self, X, n_preds=10):
 86 | 		# compute several predictions for each sample
 87 | 		y_preds = np.array([super(KerasRegressorWithIntervals, self).predict(X) for _ in range(n_preds)])
 88 | 
 89 | 		# compute tau adjustment
 90 | 		tau_inv = self.inverse_tau(self.n_train_samples)
 91 | 
 92 | 		# compute mean and variance
 93 | 		y_bar = np.mean(y_preds, axis=0)
 94 | 		y_std = np.std(y_preds, axis=0) + tau_inv
 95 | 
 96 | 		return y_bar, y_std
 97 | 
 98 | 
 99 | 
100 | class RandomForestRegressorWithIntervals(sklearn.ensemble.RandomForestRegressor):
101 | 
102 | 	def fit(self, X, y):
103 | 		# fit random forest
104 | 		super(RandomForestRegressorWithIntervals, self).fit(X, y)
105 | 
106 | 		# save training set for variance estimate
107 | 		self.X_train = X
108 | 
109 | 		return self
110 | 
111 | 	def predict(self, X):
112 | 		# compute predictions
113 | 		y_bar = super(RandomForestRegressorWithIntervals, self).predict(X)
114 | 
115 | 		# compute variance estimate
116 | 		y_var = forestci.random_forest_error(self, self.X_train, X)
117 | 		y_std = np.sqrt(y_var)
118 | 
119 | 		return y_bar, y_std
120 | 
121 | 
122 | 
123 | def select_rows_by_values(df, column, values):
124 | 	return pd.concat([df[df[column].astype(str) == v] for v in values])
125 | 
126 | 
127 | 
128 | def is_categorical(df, column):
129 | 	return column != None and df[column].dtype.kind in 'OSUV'
130 | 
131 | 
132 | 
133 | def create_dataset(df, inputs, target=None):
134 | 	# extract input/target data from trace data
135 | 	X = df[inputs]
136 | 	y = df[target].values if target != None else None
137 | 
138 | 	# one-hot encode categorical inputs, save categories
139 | 	options = {column: None for column in inputs}
140 | 
141 | 	for column in inputs:
142 | 		if is_categorical(X, column):
143 | 			options[column] = X[column].unique().tolist()
144 | 			X = pd.get_dummies(X, columns=[column], drop_first=False)
145 | 
146 | 	# save column order
147 | 	columns = list(X.columns)
148 | 
149 | 	return X.values, y, columns, options
150 | 
151 | 
152 | 
153 | def create_dummy():
154 | 	return sklearn.dummy.DummyRegressor(strategy='quantile', quantile=1.0)
155 | 
156 | 
157 | 
158 | def create_mlp(
159 | 	input_shape,
160 | 	hidden_layer_sizes=[],
161 | 	activation='relu',
162 | 	activation_target=None,
163 | 	l1=0,
164 | 	l2=1e-5,
165 | 	p_dropout=0.1,
166 | 	intervals=False,
167 | 	optimizer='adam', # lr=0.001
168 | 	loss='mean_absolute_error',
169 | 	epochs=200):
170 | 
171 | 	def build_fn():
172 | 		# create a 3-layer neural network
173 | 		x_input = keras.Input(shape=input_shape)
174 | 
175 | 		x = x_input
176 | 		for units in hidden_layer_sizes:
177 | 			x = keras.layers.Dense(
178 | 				units=units,
179 | 				activation=activation,
180 | 				kernel_regularizer=keras.regularizers.l1_l2(l1, l2),
181 | 				bias_regularizer=keras.regularizers.l1_l2(l1, l2)
182 | 			)(x)
183 | 
184 | 			if p_dropout != None:
185 | 				training = True if intervals else None
186 | 				x = keras.layers.Dropout(p_dropout)(x, training=training)
187 | 
188 | 		y_output = keras.layers.Dense(units=1, activation=activation_target)(x)
189 | 
190 | 		mlp = keras.models.Model(x_input, y_output)
191 | 
192 | 		# compile the model
193 | 		mlp.compile(optimizer=optimizer, loss=loss)
194 | 
195 | 		return mlp
196 | 
197 | 	if intervals:
198 | 		Regressor = KerasRegressorWithIntervals
199 | 	else:
200 | 		Regressor = KerasRegressor
201 | 
202 | 	return Regressor(
203 | 		build_fn=build_fn,
204 | 		batch_size=32,
205 | 		epochs=epochs,
206 | 		verbose=False,
207 | 		validation_split=0.1
208 | 	)
209 | 
210 | 
211 | 
212 | def create_rf(criterion='mae', intervals=False):
213 | 	if intervals:
214 | 		Regressor = RandomForestRegressorWithIntervals
215 | 	else:
216 | 		Regressor = sklearn.ensemble.RandomForestRegressor
217 | 
218 | 	return Regressor(n_estimators=100, criterion=criterion)
219 | 
220 | 
221 | 
222 | def create_pipeline(reg, scaler_fn=sklearn.preprocessing.MaxAbsScaler):
223 | 	return sklearn.pipeline.Pipeline([
224 | 		('scaler', scaler_fn()),
225 | 		('reg', reg)
226 | 	])
227 | 
228 | 
229 | 
230 | def mean_absolute_percentage_error(y_true, y_pred):
231 | 	y_true = np.array(y_true)
232 | 	y_pred = np.array(y_pred)
233 | 	return 100 * np.mean(np.abs((y_true - y_pred) / y_true))
234 | 
235 | 
236 | 
237 | def prediction_interval_coverage(y_true, y_lower, y_upper):
238 | 	return 100 * np.mean((y_lower <= y_true) & (y_true <= y_upper))
239 | 
240 | 
241 | 
242 | def evaluate_cv(model, X, y, cv=5, ci=0.95):
243 | 	# initialize prediction arrays
244 | 	y_bar = np.empty_like(y)
245 | 	y_std = np.empty_like(y)
246 | 
247 | 	# perform k-fold cross validation
248 | 	kfold = sklearn.model_selection.KFold(n_splits=cv, shuffle=True)
249 | 
250 | 	for train_index, test_index in kfold.split(X):
251 | 		# reset session (for keras models)
252 | 		keras.backend.clear_session()
253 | 
254 | 		# extract train/test split
255 | 		X_train, X_test = X[train_index], X[test_index]
256 | 		y_train, y_test = y[train_index], y[test_index]
257 | 
258 | 		# train model
259 | 		model_ = sklearn.base.clone(model)
260 | 		model_.fit(X_train, y_train)
261 | 
262 | 		# get model predictions
263 | 		y_bar_i, y_std_i = check_std(model_.predict(X_test))
264 | 
265 | 		y_bar[test_index] = y_bar_i
266 | 		y_std[test_index] = y_std_i
267 | 
268 | 	# compute prediction intervals
269 | 	y_lower, y_upper = predict_intervals(y_bar, y_std, ci=ci)
270 | 
271 | 	# evaluate predictions
272 | 	scores = {
273 | 		'mpe': mean_absolute_percentage_error(y, y_bar),
274 | 		'cov': prediction_interval_coverage(y, y_lower, y_upper)
275 | 	}
276 | 
277 | 	return scores, y_bar, y_std
278 | 
279 | 
280 | 
281 | def train(df, args):
282 | 	defaults = {
283 | 		'selectors': [],
284 | 		'min_std': 0.1,
285 | 		'scaler': 'maxabs',
286 | 		'model_type': 'mlp',
287 | 		'hidden_layer_sizes': [128, 128, 128],
288 | 		'epochs': 200,
289 | 		'intervals': True
290 | 	}
291 | 
292 | 	args = {**defaults, **args}
293 | 
294 | 	# apply selectorss to dataframe
295 | 	for selector in args['selectors']:
296 | 		# parse column and selected values
297 | 		column, values = selector.split('=')
298 | 		values = values.split(',')
299 | 
300 | 		# select rows from dataframe
301 | 		if values != None and len(values) > 0:
302 | 			df = select_rows_by_values(df, column, values)
303 | 
304 | 	# extract input/output data from trace data
305 | 	try:
306 | 		X, y, columns, options = create_dataset(df, args['inputs'], args['target'])
307 | 	except:
308 | 		raise RuntimeError('error: one or more input/output variables are not in the dataset')
309 | 
310 | 	# select scaler
311 | 	try:
312 | 		scalers = {
313 | 			'maxabs': sklearn.preprocessing.MaxAbsScaler,
314 | 			'minmax': sklearn.preprocessing.MinMaxScaler,
315 | 			'standard': sklearn.preprocessing.StandardScaler
316 | 		}
317 | 		Scaler = scalers[args['scaler']]
318 | 	except:
319 | 		raise RuntimeError('error: scaler %s not recognized' % (args['scaler']))
320 | 
321 | 	# use dummy regressor if target data has low variance
322 | 	if y.std() < args['min_std']:
323 | 		print('target value has low variance, using max value rounded up')
324 | 		model_type = 'dummy'
325 | 	else:
326 | 		model_type = args['model_type']
327 | 
328 | 	# create regressor
329 | 	if model_type == 'dummy':
330 | 		reg = create_dummy()
331 | 
332 | 	elif model_type == 'mlp':
333 | 		reg = create_mlp(
334 | 			X.shape[1],
335 | 			hidden_layer_sizes=args['hidden_layer_sizes'],
336 | 			epochs=args['epochs'],
337 | 			intervals=args['intervals'])
338 | 
339 | 	elif model_type == 'rf':
340 | 		reg = create_rf(intervals=args['intervals'])
341 | 
342 | 	# create model
343 | 	model = create_pipeline(reg, scaler_fn=Scaler)
344 | 
345 | 	# save order of input columns
346 | 	args['inputs'] = options
347 | 	args['columns'] = columns
348 | 
349 | 	# train and evaluate model
350 | 	scores, _, _ = evaluate_cv(model, X, y)
351 | 
352 | 	# train model on full dataset
353 | 	model.fit(X, y)
354 | 
355 | 	# workaround for keras models
356 | 	try:
357 | 		model.named_steps['regressor'].build_fn = None
358 | 	except:
359 | 		pass
360 | 
361 | 	# save model to file
362 | 	f = open('%s/%s.pkl' % (env.MODELS_DIR, args['model_name']), 'wb')
363 | 	pickle.dump(model, f)
364 | 
365 | 	# save args to file
366 | 	f = open('%s/%s.json' % (env.MODELS_DIR, args['model_name']), 'w')
367 | 	json.dump(args, f)
368 | 
369 | 	# return results
370 | 	y_bar, y_std = check_std(model.predict(X))
371 | 
372 | 	return {
373 | 		'y_true': y,
374 | 		'y_pred': y_bar,
375 | 		'mpe': scores['mpe'],
376 | 		'cov': scores['cov']
377 | 	}
378 | 
379 | 
380 | 
381 | def predict(model_name, inputs, ci=0.95):
382 | 	# load model
383 | 	f = open('%s/%s.pkl' % (env.MODELS_DIR, model_name), 'rb')
384 | 	model = pickle.load(f)
385 | 
386 | 	# load model configuration
387 | 	f = open('%s/%s.json' % (env.MODELS_DIR, model_name), 'r')
388 | 	args = json.load(f)
389 | 
390 | 	# convert inputs into an ordered vector
391 | 	x_input = {}
392 | 
393 | 	for column, options in args['inputs'].items():
394 | 		# one-hot encode categorical inputs
395 | 		if options != None:
396 | 			for v in options:
397 | 				x_input['%s_%s' % (column, v)] = (inputs[column] == v)
398 | 
399 | 		# copy numerical inputs directly
400 | 		else:
401 | 			x_input[column] = inputs[column]
402 | 
403 | 	x_input = [float(x_input[c]) for c in args['columns']]
404 | 
405 | 	# perform inference
406 | 	X = np.array([x_input])
407 | 	y_bar, y_std = check_std(model.predict(X))
408 | 	y_lower, y_upper = predict_intervals(y_bar, y_std, ci=ci)
409 | 
410 | 	# return results
411 | 	return {
412 | 		args['target']: [float(y_lower), float(y_bar), float(y_upper)]
413 | 	}


--------------------------------------------------------------------------------
/bin/server.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import base64
  4 | import bson
  5 | import json
  6 | import multiprocessing as mp
  7 | import os
  8 | import pandas as pd
  9 | import shutil
 10 | import socket
 11 | import subprocess
 12 | import time
 13 | import tornado
 14 | import tornado.escape
 15 | import tornado.httpserver
 16 | import tornado.ioloop
 17 | import tornado.options
 18 | import tornado.web
 19 | 
 20 | import backend
 21 | import env
 22 | import model as Model
 23 | import visualizer as Visualizer
 24 | import workflow as Workflow
 25 | 
 26 | 
 27 | 
 28 | def list_dir_recursive(path, relpath_start=''):
 29 | 	files = [os.path.join(dir, f) for (dir, subdirs, filenames) in os.walk(path) for f in filenames]
 30 | 	files = [os.path.relpath(f, start=relpath_start) for f in files]
 31 | 	files.sort()
 32 | 
 33 | 	return files
 34 | 
 35 | 
 36 | 
 37 | def message(status, message):
 38 | 	return {
 39 | 		'status': status,
 40 | 		'message': message
 41 | 	}
 42 | 
 43 | 
 44 | 
 45 | class WorkflowQueryHandler(tornado.web.RequestHandler):
 46 | 
 47 | 	async def get(self):
 48 | 		page = int(self.get_query_argument('page', 0))
 49 | 		page_size = int(self.get_query_argument('page_size', 100))
 50 | 
 51 | 		db = self.settings['db']
 52 | 		workflows = await db.workflow_query(page, page_size)
 53 | 
 54 | 		self.set_status(200)
 55 | 		self.set_header('content-type', 'application/json')
 56 | 		self.write(tornado.escape.json_encode(workflows))
 57 | 
 58 | 
 59 | 
 60 | class WorkflowCreateHandler(tornado.web.RequestHandler):
 61 | 
 62 | 	REQUIRED_KEYS = set([
 63 | 		'pipeline'
 64 | 	])
 65 | 
 66 | 	DEFAULTS = {
 67 | 		'name': '',
 68 | 		'params_format': '',
 69 | 		'params_data': '',
 70 | 		'profiles': 'standard',
 71 | 		'revision': 'master',
 72 | 		'input_dir': 'input',
 73 | 		'output_dir': 'output',
 74 | 		'attempts': 0
 75 | 	}
 76 | 
 77 | 	def get(self):
 78 | 		workflow = {**self.DEFAULTS, **{ '_id': '0' }}
 79 | 
 80 | 		self.set_status(200)
 81 | 		self.set_header('content-type', 'application/json')
 82 | 		self.write(tornado.escape.json_encode(workflow))
 83 | 
 84 | 	async def post(self):
 85 | 		db = self.settings['db']
 86 | 
 87 | 		# make sure request body is valid
 88 | 		try:
 89 | 			data = tornado.escape.json_decode(self.request.body)
 90 | 			missing_keys = self.REQUIRED_KEYS - data.keys()
 91 | 		except json.JSONDecodeError:
 92 | 			self.set_status(422)
 93 | 			self.write(message(422, 'Ill-formatted JSON'))
 94 | 			return
 95 | 
 96 | 		if missing_keys:
 97 | 			self.set_status(400)
 98 | 			self.write(message(400, 'Missing required field(s): %s' % list(missing_keys)))
 99 | 			return
100 | 
101 | 		# create workflow
102 | 		workflow = {**self.DEFAULTS, **data, **{ 'status': 'nascent' }}
103 | 		workflow['_id'] = str(bson.ObjectId())
104 | 
105 | 		# append creation timestamp to workflow
106 | 		workflow['date_created'] = int(time.time() * 1000)
107 | 
108 | 		# transform pipeline name to lowercase
109 | 		workflow['pipeline'] = workflow['pipeline'].lower() 
110 | 
111 | 		# save workflow
112 | 		await db.workflow_create(workflow)
113 | 
114 | 		# create workflow directory
115 | 		workflow_dir = os.path.join(env.WORKFLOWS_DIR, workflow['_id'])
116 | 		os.makedirs(workflow_dir)
117 | 
118 | 		self.set_status(200)
119 | 		self.set_header('content-type', 'application/json')
120 | 		self.write(tornado.escape.json_encode({ '_id': workflow['_id'] }))
121 | 
122 | 
123 | 
124 | 
125 | class WorkflowEditHandler(tornado.web.RequestHandler):
126 | 
127 | 	REQUIRED_KEYS = set([
128 | 		'pipeline'
129 | 	])
130 | 
131 | 	DEFAULTS = {
132 | 		'name': '',
133 | 		'params_format': '',
134 | 		'params_data': '',
135 | 		'profiles': 'standard',
136 | 		'revision': 'master',
137 | 		'input_dir': 'input',
138 | 		'output_dir': 'output',
139 | 		'attempts': 0
140 | 	}
141 | 
142 | 	async def get(self, id):
143 | 		db = self.settings['db']
144 | 
145 | 		try:
146 | 			# get workflow
147 | 			workflow = await db.workflow_get(id)
148 | 
149 | 			# append list of input files
150 | 			workflow_dir = os.path.join(env.WORKFLOWS_DIR, id)
151 | 			input_dir = os.path.join(workflow_dir, workflow['input_dir'])
152 | 			output_dir = os.path.join(workflow_dir, workflow['output_dir'])
153 | 
154 | 			if os.path.exists(input_dir):
155 | 				workflow['input_files'] = list_dir_recursive(input_dir, relpath_start=workflow_dir)
156 | 			else:
157 | 				workflow['input_files'] = []
158 | 
159 | 			# append list of output files
160 | 			if os.path.exists(output_dir):
161 | 				workflow['output_files'] = list_dir_recursive(output_dir, relpath_start=workflow_dir)
162 | 			else:
163 | 				workflow['output_files'] = []
164 | 
165 | 			# append status of output data
166 | 			workflow['output_data'] = os.path.exists('%s/%s-output.tar.gz' % (workflow_dir, id))
167 | 
168 | 			self.set_status(200)
169 | 			self.set_header('content-type', 'application/json')
170 | 			self.write(tornado.escape.json_encode(workflow))
171 | 		except:
172 | 			self.set_status(404)
173 | 			self.write(message(404, 'Failed to get workflow \"%s\"' % id))
174 | 
175 | 	async def post(self, id):
176 | 		db = self.settings['db']
177 | 
178 | 		# make sure request body is valid
179 | 		try:
180 | 			data = tornado.escape.json_decode(self.request.body)
181 | 			missing_keys = self.REQUIRED_KEYS - data.keys()
182 | 		except json.JSONDecodeError:
183 | 			self.set_status(422)
184 | 			self.write(message(422, 'Ill-formatted JSON'))
185 | 
186 | 		if missing_keys:
187 | 			self.set_status(400)
188 | 			self.write(message(400, 'Missing required field(s): %s' % list(missing_keys)))
189 | 			return
190 | 
191 | 		try:
192 | 			# update workflow from request body
193 | 			workflow = await db.workflow_get(id)
194 | 			workflow = {**self.DEFAULTS, **workflow, **data}
195 | 
196 | 			# transform pipeline name to lowercase
197 | 			workflow['pipeline'] = workflow['pipeline'].lower() 
198 | 
199 | 			# save workflow
200 | 			await db.workflow_update(id, workflow)
201 | 
202 | 			self.set_status(200)
203 | 			self.set_header('content-type', 'application/json')
204 | 			self.write(tornado.escape.json_encode({ '_id': id }))
205 | 		except:
206 | 			self.set_status(404)
207 | 			self.write(message(404, 'Failed to update workflow \"%s\"' % id))
208 | 
209 | 	async def delete(self, id):
210 | 		db = self.settings['db']
211 | 
212 | 		try:
213 | 			# delete workflow
214 | 			await db.workflow_delete(id)
215 | 
216 | 			# delete workflow directory
217 | 			shutil.rmtree(os.path.join(env.WORKFLOWS_DIR, id), ignore_errors=True)
218 | 
219 | 			self.set_status(200)
220 | 			self.write(message(200, 'Workflow \"%s\" was deleted' % id))
221 | 		except:
222 | 			self.set_status(404)
223 | 			self.write(message(404, 'Failed to delete workflow \"%s\"' % id))
224 | 
225 | 
226 | 
227 | 
228 | class WorkflowUploadHandler(tornado.web.RequestHandler):
229 | 
230 | 	async def post(self, id):
231 | 		db = self.settings['db']
232 | 
233 | 		# make sure request body contains files
234 | 		files = self.request.files
235 | 
236 | 		if not files:
237 | 			self.set_status(400)
238 | 			self.write(message(400, 'No files were uploaded'))
239 | 			return
240 | 
241 | 		# get workflow
242 | 		workflow = await db.workflow_get(id)
243 | 
244 | 		# initialize input directory
245 | 		input_dir = os.path.join(env.WORKFLOWS_DIR, id, workflow['input_dir'])
246 | 		os.makedirs(input_dir, exist_ok=True)
247 | 
248 | 		# save uploaded files to input directory
249 | 		filenames = []
250 | 
251 | 		for f_list in files.values():
252 | 			for f_arg in f_list:
253 | 				filename, body = f_arg['filename'], f_arg['body']
254 | 				with open(os.path.join(input_dir, filename), 'wb') as f:
255 | 					f.write(body)
256 | 				filenames.append(filename)
257 | 
258 | 		self.set_status(200)
259 | 		self.write(message(200, 'File \"%s\" was uploaded for workflow \"%s\" successfully' % (filenames, id)))
260 | 
261 | 
262 | 
263 | class WorkflowLaunchHandler(tornado.web.RequestHandler):
264 | 
265 | 	resume = False
266 | 
267 | 	async def post(self, id):
268 | 		db = self.settings['db']
269 | 
270 | 		try:
271 | 			# get workflow
272 | 			workflow = await db.workflow_get(id)
273 | 
274 | 			# make sure workflow is not already running
275 | 			if workflow['status'] == 'running':
276 | 				self.set_status(400)
277 | 				self.write(message(400, 'Workflow \"%s\" is already running' % id))
278 | 				return
279 | 
280 | 			# copy nextflow.config from input directory if it exists
281 | 			workflow_dir = os.path.join(env.WORKFLOWS_DIR, id)
282 | 			input_dir = os.path.join(workflow_dir, workflow['input_dir'])
283 | 			src = os.path.join(input_dir, 'nextflow.config')
284 | 			dst = os.path.join(workflow_dir, 'nextflow.config')
285 | 
286 | 			if os.path.exists(dst):
287 | 				os.remove(dst)
288 | 
289 | 			if os.path.exists(src):
290 | 				shutil.copyfile(src, dst)
291 | 
292 | 			# append additional settings to nextflow.config
293 | 			with open(dst, 'a') as f:
294 | 				weblog_url = 'http://%s:%d/api/tasks' % (socket.gethostbyname(socket.gethostname()), tornado.options.options.port)
295 | 				f.write('weblog { enabled = true\n url = \"%s\" }\n' % (weblog_url))
296 | 				f.write('k8s { launchDir = \"%s\" }\n' % (workflow_dir))
297 | 
298 | 			# update workflow status
299 | 			workflow['status'] = 'running'
300 | 			workflow['date_submitted'] = int(time.time() * 1000)
301 | 			workflow['attempts'] += 1
302 | 
303 | 			await db.workflow_update(id, workflow)
304 | 
305 | 			# launch workflow as a child process
306 | 			p = mp.Process(target=Workflow.launch, args=(db, workflow, self.resume))
307 | 			p.start()
308 | 
309 | 			self.set_status(200)
310 | 			self.write(message(200, 'Workflow \"%s\" was launched' % id))
311 | 		except:
312 | 			self.set_status(404)
313 | 			self.write(message(404, 'Failed to launch workflow \"%s\"' % id))
314 | 
315 | 
316 | 
317 | class WorkflowResumeHandler(WorkflowLaunchHandler):
318 | 
319 | 	resume = True
320 | 
321 | 
322 | 
323 | class WorkflowCancelHandler(tornado.web.RequestHandler):
324 | 
325 | 	async def post(self, id):
326 | 		db = self.settings['db']
327 | 
328 | 		try:
329 | 			# get workflow
330 | 			workflow = await db.workflow_get(id)
331 | 			workflow = {**{ 'pid': -1 }, **workflow}
332 | 
333 | 			# cancel workflow
334 | 			Workflow.cancel(workflow)
335 | 
336 | 			# update workflow status
337 | 			workflow['status'] = 'failed'
338 | 			workflow['pid'] = -1
339 | 
340 | 			await db.workflow_update(id, workflow)
341 | 
342 | 			self.set_status(200)
343 | 			self.write(message(200, 'Workflow \"%s\" was canceled' % id))
344 | 		except:
345 | 			self.set_status(404)
346 | 			self.write(message(404, 'Failed to cancel workflow \"%s\"' % id))
347 | 
348 | 
349 | 
350 | class WorkflowLogHandler(tornado.web.RequestHandler):
351 | 
352 | 	async def get(self, id):
353 | 		db = self.settings['db']
354 | 
355 | 		try:
356 | 			# get workflow
357 | 			workflow = await db.workflow_get(id)
358 | 
359 | 			# append log if it exists
360 | 			log_file = os.path.join(env.WORKFLOWS_DIR, id, '.workflow.log')
361 | 
362 | 			if os.path.exists(log_file):
363 | 				f = open(log_file)
364 | 				log = ''.join(f.readlines())
365 | 			else:
366 | 				log = ''
367 | 
368 | 			# construct response data
369 | 			data = {
370 | 				'_id': id,
371 | 				'status': workflow['status'],
372 | 				'attempts': workflow['attempts'],
373 | 				'log': log
374 | 			}
375 | 
376 | 			self.set_status(200)
377 | 			self.set_header('content-type', 'application/json')
378 | 			self.set_header('cache-control', 'no-store, no-cache, must-revalidate, max-age=0')
379 | 			self.write(tornado.escape.json_encode(data))
380 | 		except:
381 | 			self.set_status(404)
382 | 			self.write(message(404, 'Failed to fetch log for workflow \"%s\"' % id))
383 | 
384 | 
385 | 
386 | class WorkflowDownloadHandler(tornado.web.StaticFileHandler):
387 | 
388 | 	def parse_url_path(self, id):
389 | 		# provide output file if path is specified, otherwise output data archive
390 | 		filename_default = '%s-output.tar.gz' % id
391 | 		filename = self.get_query_argument('path', filename_default)
392 | 
393 | 		self.set_header('content-disposition', 'attachment; filename=\"%s\"' % filename)
394 | 		return os.path.join(id, filename)
395 | 
396 | 
397 | 
398 | class TaskQueryHandler(tornado.web.RequestHandler):
399 | 
400 | 	async def get(self):
401 | 		page = int(self.get_query_argument('page', 0))
402 | 		page_size = int(self.get_query_argument('page_size', 100))
403 | 
404 | 		db = self.settings['db']
405 | 		tasks = await db.task_query(page, page_size)
406 | 
407 | 		self.set_status(200)
408 | 		self.set_header('content-type', 'application/json')
409 | 		self.write(tornado.escape.json_encode(tasks))
410 | 
411 | 	async def post(self):
412 | 		db = self.settings['db']
413 | 
414 | 		# make sure request body is valid
415 | 		try:
416 | 			task = tornado.escape.json_decode(self.request.body)
417 | 		except json.JSONDecodeError:
418 | 			self.set_status(422)
419 | 			self.write(message(422, 'Ill-formatted JSON'))
420 | 			return
421 | 
422 | 		try:
423 | 			# append id to task
424 | 			task['_id'] = str(bson.ObjectId())
425 | 
426 | 			# extract input features for task
427 | 			if task['event'] == 'process_completed':
428 | 				# load execution log
429 | 				filenames = ['.command.log', '.command.out', '.command.err']
430 | 				filenames = [os.path.join(task['trace']['workdir'], filename) for filename in filenames]
431 | 				files = [open(filename) for filename in filenames if os.path.exists(filename)]
432 | 				lines = [line.strip() for f in files for line in f]
433 | 
434 | 				# parse input features from trace directives
435 | 				PREFIX = '#TRACE'
436 | 				lines = [line[len(PREFIX):] for line in lines if line.startswith(PREFIX)]
437 | 				items = [line.split('=') for line in lines]
438 | 				conditions = {k.strip(): v.strip() for k, v in items}
439 | 
440 | 				# append input features to task trace
441 | 				task['trace'] = {**task['trace'], **conditions}
442 | 
443 | 			# save task
444 | 			await db.task_create(task)
445 | 
446 | 			# update workflow status on completed event
447 | 			if task['event'] == 'completed':
448 | 				# get workflow
449 | 				workflow_id = task['runName'].split('-')[1]
450 | 				workflow = await db.workflow_get(workflow_id)
451 | 
452 | 				# update workflow status
453 | 				success = task['metadata']['workflow']['success']
454 | 				if success:
455 | 					workflow['status'] = 'completed'
456 | 				else:
457 | 					workflow['status'] = 'failed'
458 | 
459 | 				await db.workflow_update(workflow['_id'], workflow)
460 | 
461 | 			self.set_status(200)
462 | 			self.set_header('content-type', 'application/json')
463 | 			self.write(tornado.escape.json_encode({ '_id': task['_id'] }))
464 | 		except:
465 | 			self.set_status(404)
466 | 			self.write(message(404, 'Failed to save task'))
467 | 
468 | 
469 | 
470 | class TaskLogHandler(tornado.web.RequestHandler):
471 | 
472 | 	async def get(self, id):
473 | 		db = self.settings['db']
474 | 
475 | 		try:
476 | 			# get workflow
477 | 			task = await db.task_get(id)
478 | 			workdir = task['trace']['workdir']
479 | 
480 | 			# construct response data
481 | 			data = {
482 | 				'_id': id,
483 | 				'out': '',
484 | 				'err': ''
485 | 			}
486 | 
487 | 			# append log files if they exist
488 | 			out_file = os.path.join(workdir, '.command.out')
489 | 			err_file = os.path.join(workdir, '.command.err')
490 | 
491 | 			if os.path.exists(out_file):
492 | 				f = open(out_file)
493 | 				data['out'] = ''.join(f.readlines())
494 | 
495 | 			if os.path.exists(err_file):
496 | 				f = open(err_file)
497 | 				data['err'] = ''.join(f.readlines())
498 | 
499 | 			self.set_status(200)
500 | 			self.set_header('content-type', 'application/json')
501 | 			self.write(tornado.escape.json_encode(data))
502 | 		except:
503 | 			self.set_status(404)
504 | 			self.write(message(404, 'Failed to fetch log for workflow \"%s\"' % id))
505 | 
506 | 
507 | 
508 | class TaskQueryPipelinesHandler(tornado.web.RequestHandler):
509 | 
510 | 	async def get(self):
511 | 		db = self.settings['db']
512 | 
513 | 		try:
514 | 			# query pipelines from database
515 | 			pipelines = await db.task_query_pipelines()
516 | 
517 | 			self.set_status(200)
518 | 			self.set_header('content-type', 'application/json')
519 | 			self.write(tornado.escape.json_encode(pipelines))
520 | 		except Exception as e:
521 | 			self.set_status(404)
522 | 			self.write(message(404, 'Failed to perform query'))
523 | 			raise e
524 | 
525 | 
526 | 
527 | class TaskQueryPipelineHandler(tornado.web.RequestHandler):
528 | 
529 | 	async def get(self, pipeline):
530 | 		db = self.settings['db']
531 | 
532 | 		try:
533 | 			# query tasks from database
534 | 			pipeline = pipeline.lower()
535 | 			tasks = await db.task_query_pipeline(pipeline)
536 | 			tasks = [task['trace'] for task in tasks]
537 | 
538 | 			# separate tasks into dataframes by process
539 | 			process_names = list(set([task['process'] for task in tasks]))
540 | 			dfs = {}
541 | 
542 | 			for process in process_names:
543 | 				dfs[process] = [task for task in tasks if task['process'] == process]
544 | 
545 | 			self.set_status(200)
546 | 			self.set_header('content-type', 'application/json')
547 | 			self.write(tornado.escape.json_encode(dfs))
548 | 		except Exception as e:
549 | 			self.set_status(404)
550 | 			self.write(message(404, 'Failed to perform query'))
551 | 			raise e
552 | 
553 | 
554 | 
555 | class TaskArchiveHandler(tornado.web.RequestHandler):
556 | 
557 | 	async def get(self, pipeline):
558 | 		db = self.settings['db']
559 | 
560 | 		try:
561 | 			# query tasks from database
562 | 			pipeline = pipeline.lower()
563 | 			tasks = await db.task_query_pipeline(pipeline)
564 | 			tasks = [task['trace'] for task in tasks]
565 | 
566 | 			# separate tasks into dataframes by process
567 | 			process_names = list(set([task['process'] for task in tasks]))
568 | 			dfs = {}
569 | 
570 | 			for process in process_names:
571 | 				dfs[process] = pd.DataFrame([task for task in tasks if task['process'] == process])
572 | 
573 | 			# change to trace directory
574 | 			os.chdir(env.TRACE_DIR)
575 | 
576 | 			# save dataframes to csv files
577 | 			for process in process_names:
578 | 				filename = 'trace.%s.txt' % (process)
579 | 				dfs[process].to_csv(filename, sep='\t', index=False)
580 | 
581 | 			# create zip archive of trace files
582 | 			zipfile = 'trace.%s.zip' % (pipeline.replace('/', '__'))
583 | 			files = ['trace.%s.txt' % (process) for process in process_names]
584 | 
585 | 			subprocess.run(['zip', zipfile] + files, check=True)
586 | 			subprocess.run(['rm', '-f'] + files, check=True)
587 | 
588 | 			# return to working directory
589 | 			os.chdir('..')
590 | 
591 | 			self.set_status(200)
592 | 			self.write(message(200, 'Archive was created'))
593 | 		except Exception as e:
594 | 			self.set_status(404)
595 | 			self.write(message(404, 'Failed to create archive'))
596 | 			raise e
597 | 
598 | 
599 | 
600 | class TaskArchiveDownloadHandler(tornado.web.StaticFileHandler):
601 | 
602 | 	def parse_url_path(self, pipeline):
603 | 		# get filename of trace archive
604 | 		filename = 'trace.%s.zip' % (pipeline.replace('/', '__'))
605 | 
606 | 		self.set_header('content-disposition', 'attachment; filename=\"%s\"' % filename)
607 | 		return filename
608 | 
609 | 
610 | 
611 | class TaskVisualizeHandler(tornado.web.RequestHandler):
612 | 
613 | 	async def post(self):
614 | 		db = self.settings['db']
615 | 
616 | 		try:
617 | 			# parse request body
618 | 			data = tornado.escape.json_decode(self.request.body)
619 | 
620 | 			# query task dataset from database
621 | 			pipeline = data['pipeline'].lower()
622 | 			tasks = await db.task_query_pipeline(pipeline)
623 | 			tasks = [task['trace'] for task in tasks]
624 | 			tasks_process = [task for task in tasks if task['process'] == data['process']]
625 | 
626 | 			df = pd.DataFrame(tasks_process)
627 | 
628 | 			# prepare visualizer args
629 | 			args = data['args']
630 | 			args['plot_name'] = str(bson.ObjectId())
631 | 
632 | 			if args['selectors'] == '':
633 | 				args['selectors'] = []
634 | 			else:
635 | 				args['selectors'] = args['selectors'].split(' ')
636 | 
637 | 			# append columns from merge process if specified
638 | 			if 'merge_process' in args:
639 | 				# load merge data
640 | 				tasks_merge = [task for task in tasks if task['process'] == args['merge_process']]
641 | 				df_merge = pd.DataFrame(tasks_merge)
642 | 
643 | 				# remove duplicate columns
644 | 				dupe_columns = set(df.columns).intersection(df_merge.columns)
645 | 				dupe_columns.remove(args['merge_key'])
646 | 				df_merge.drop(columns=dupe_columns, inplace=True)
647 | 
648 | 				# append merge columns to data
649 | 				df = df.merge(df_merge, on=args['merge_key'], how='left', copy=False)
650 | 
651 | 			# create visualization
652 | 			outfile = Visualizer.visualize(df, args)
653 | 
654 | 			# encode image file into base64
655 | 			with open(outfile, 'rb') as f:
656 | 				image_data = base64.b64encode(f.read()).decode('utf-8')
657 | 
658 | 			self.set_status(200)
659 | 			self.set_header('content-type', 'application/json')
660 | 			self.write(tornado.escape.json_encode(image_data))
661 | 		except Exception as e:
662 | 			self.set_status(404)
663 | 			self.write(message(404, 'Failed to visualize data'))
664 | 			raise e
665 | 
666 | 
667 | 
668 | class TaskEditHandler(tornado.web.RequestHandler):
669 | 
670 | 	async def get(self, id):
671 | 		db = self.settings['db']
672 | 
673 | 		try:
674 | 			task = await db.task_get(id)
675 | 
676 | 			self.set_status(200)
677 | 			self.set_header('content-type', 'application/json')
678 | 			self.write(tornado.escape.json_encode(task))
679 | 		except:
680 | 			self.set_status(404)
681 | 			self.write(message(404, 'Failed to get task \"%s\"' % id))
682 | 
683 | 
684 | 
685 | class ModelTrainHandler(tornado.web.RequestHandler):
686 | 
687 | 	async def post(self):
688 | 		db = self.settings['db']
689 | 
690 | 		try:
691 | 			# parse request body
692 | 			data = tornado.escape.json_decode(self.request.body)
693 | 
694 | 			# query task dataset from database
695 | 			pipeline = data['pipeline'].lower()
696 | 			tasks = await db.task_query_pipeline(pipeline)
697 | 			tasks = [task['trace'] for task in tasks]
698 | 			tasks_process = [task for task in tasks if task['process'] == data['process']]
699 | 
700 | 			df = pd.DataFrame(tasks_process)
701 | 
702 | 			# prepare training args
703 | 			args = data['args']
704 | 			args['hidden_layer_sizes'] = [int(v) for v in args['hidden_layer_sizes'].split(' ')]
705 | 			args['model_name'] = '%s.%s.%s' % (pipeline.replace('/', '__'), data['process'], args['target'])
706 | 
707 | 			if args['selectors'] == '':
708 | 				args['selectors'] = []
709 | 			else:
710 | 				args['selectors'] = args['selectors'].split(' ')
711 | 
712 | 			# append columns from merge process if specified
713 | 			if args['merge_process'] != None:
714 | 				# load merge data
715 | 				tasks_merge = [task for task in tasks if task['process'] == args['merge_process']]
716 | 				df_merge = pd.DataFrame(tasks_merge)
717 | 
718 | 				# remove duplicate columns
719 | 				dupe_columns = set(df.columns).intersection(df_merge.columns)
720 | 				dupe_columns.remove(args['merge_key'])
721 | 				df_merge.drop(columns=dupe_columns, inplace=True)
722 | 
723 | 				# append merge columns to data
724 | 				df = df.merge(df_merge, on=args['merge_key'], how='left', copy=False)
725 | 
726 | 			# train model
727 | 			results = Model.train(df, args)
728 | 
729 | 			# visualize training results
730 | 			df = pd.DataFrame()
731 | 			df['y_true'] = results['y_true']
732 | 			df['y_pred'] = results['y_pred']
733 | 
734 | 			outfile = Visualizer.visualize(df, {
735 | 				'xaxis': 'y_true',
736 | 				'yaxis': 'y_pred',
737 | 				'plot_name': str(bson.ObjectId())
738 | 			})
739 | 
740 | 			# encode image file into base64
741 | 			with open(outfile, 'rb') as f:
742 | 				results['scatterplot'] = base64.b64encode(f.read()).decode('utf-8')
743 | 
744 | 			# remove extra fields from results
745 | 			del results['y_true']
746 | 			del results['y_pred']
747 | 
748 | 			self.set_status(200)
749 | 			self.set_header('content-type', 'application/json')
750 | 			self.write(tornado.escape.json_encode(results))
751 | 		except Exception as e:
752 | 			self.set_status(404)
753 | 			self.write(message(404, 'Failed to train model'))
754 | 			raise e
755 | 
756 | 
757 | 
758 | class ModelConfigHandler(tornado.web.RequestHandler):
759 | 
760 | 	async def get(self):
761 | 		try:
762 | 			# parse request body
763 | 			pipeline = self.get_argument('pipeline', default=None)
764 | 			process = self.get_argument('process', default=None)
765 | 			target = self.get_argument('target', default=None)
766 | 
767 | 			# get model config file
768 | 			filename = '%s/%s.%s.%s.json' % (env.MODELS_DIR, pipeline.lower().replace('/', '__'), process, target)
769 | 
770 | 			with open(filename, 'r') as f:
771 | 				config = json.load(f)
772 | 
773 | 			self.set_status(200)
774 | 			self.set_header('content-type', 'application/json')
775 | 			self.write(tornado.escape.json_encode(config))
776 | 		except Exception as e:
777 | 			self.set_status(404)
778 | 			self.write(message(404, 'Failed to get model config'))
779 | 			raise e
780 | 
781 | 
782 | 
783 | class ModelPredictHandler(tornado.web.RequestHandler):
784 | 
785 | 	async def post(self):
786 | 		try:
787 | 			# parse request body
788 | 			data = tornado.escape.json_decode(self.request.body)
789 | 			data['pipeline'] = data['pipeline'].lower()
790 | 			data['model_name'] = '%s.%s.%s' % (data['pipeline'].replace('/', '__'), data['process'], data['target'])
791 | 
792 | 			# perform model prediction
793 | 			results = Model.predict(data['model_name'], data['inputs'])
794 | 
795 | 			self.set_status(200)
796 | 			self.set_header('content-type', 'application/json')
797 | 			self.write(tornado.escape.json_encode(results))
798 | 		except Exception as e:
799 | 			self.set_status(404)
800 | 			self.write(message(404, 'Failed to perform model prediction'))
801 | 			raise e
802 | 
803 | 
804 | 
805 | if __name__ == '__main__':
806 | 	# parse command-line options
807 | 	tornado.options.define('backend', default='mongo', help='Database backend to use (file or mongo)')
808 | 	tornado.options.define('url-file', default='db.pkl', help='database file for file backend')
809 | 	tornado.options.define('url-mongo', default='localhost', help='mongodb service url for mongo backend')
810 | 	tornado.options.define('np', default=1, help='number of server processes')
811 | 	tornado.options.define('port', default=8080)
812 | 	tornado.options.parse_command_line()
813 | 
814 | 	# initialize auxiliary directories
815 | 	os.makedirs(env.MODELS_DIR, exist_ok=True)
816 | 	os.makedirs(env.TRACE_DIR, exist_ok=True)
817 | 	os.makedirs(env.WORKFLOWS_DIR, exist_ok=True)
818 | 
819 | 	# initialize api endpoints
820 | 	app = tornado.web.Application([
821 | 		(r'/api/workflows', WorkflowQueryHandler),
822 | 		(r'/api/workflows/0', WorkflowCreateHandler),
823 | 		(r'/api/workflows/([a-zA-Z0-9-]+)', WorkflowEditHandler),
824 | 		(r'/api/workflows/([a-zA-Z0-9-]+)/upload', WorkflowUploadHandler),
825 | 		(r'/api/workflows/([a-zA-Z0-9-]+)/launch', WorkflowLaunchHandler),
826 | 		(r'/api/workflows/([a-zA-Z0-9-]+)/resume', WorkflowResumeHandler),
827 | 		(r'/api/workflows/([a-zA-Z0-9-]+)/cancel', WorkflowCancelHandler),
828 | 		(r'/api/workflows/([a-zA-Z0-9-]+)/log', WorkflowLogHandler),
829 | 		(r'/api/workflows/([a-zA-Z0-9-]+)/download', WorkflowDownloadHandler, dict(path=env.WORKFLOWS_DIR)),
830 | 		(r'/api/tasks', TaskQueryHandler),
831 | 		(r'/api/tasks/([a-zA-Z0-9-]+)/log', TaskLogHandler),
832 | 		(r'/api/tasks/pipelines', TaskQueryPipelinesHandler),
833 | 		(r'/api/tasks/pipelines/(.+)', TaskQueryPipelineHandler),
834 | 		(r'/api/tasks/archive/(.+)/download', TaskArchiveDownloadHandler, dict(path=env.TRACE_DIR)),
835 | 		(r'/api/tasks/archive/(.+)', TaskArchiveHandler),
836 | 		(r'/api/tasks/visualize', TaskVisualizeHandler),
837 | 		(r'/api/tasks/([a-zA-Z0-9-]+)', TaskEditHandler),
838 | 		(r'/api/model/train', ModelTrainHandler),
839 | 		(r'/api/model/config', ModelConfigHandler),
840 | 		(r'/api/model/predict', ModelPredictHandler),
841 | 		(r'/(.*)', tornado.web.StaticFileHandler, dict(path='./client', default_filename='index.html'))
842 | 	])
843 | 
844 | 	try:
845 | 		# spawn server processes
846 | 		server = tornado.httpserver.HTTPServer(app, max_buffer_size=1024 ** 3)
847 | 		server.bind(tornado.options.options.port)
848 | 		server.start(tornado.options.options.np)
849 | 
850 | 		# connect to database
851 | 		if tornado.options.options.backend == 'file':
852 | 			app.settings['db'] = backend.FileBackend(tornado.options.options.url_file)
853 | 
854 | 		elif tornado.options.options.backend == 'mongo':
855 | 			app.settings['db'] = backend.MongoBackend(tornado.options.options.url_mongo)
856 | 
857 | 		else:
858 | 			raise KeyError('Backend must be either \'json\' or \'mongo\'')
859 | 
860 | 		# start the event loop
861 | 		print('The API is listening on http://0.0.0.0:%d' % (tornado.options.options.port), flush=True)
862 | 		tornado.ioloop.IOLoop.current().start()
863 | 
864 | 	except KeyboardInterrupt:
865 | 		tornado.ioloop.IOLoop.current().stop()
866 | 


--------------------------------------------------------------------------------
/bin/visualizer.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | 
  6 | 
  7 | 
  8 | def select_rows_by_values(df, column, values):
  9 | 	return pd.concat([df[df[column].astype(str) == v] for v in values])
 10 | 
 11 | 
 12 | 
 13 | def is_continuous(df, column):
 14 | 	return column != None and df[column].dtype.kind in 'biufcmM'
 15 | 
 16 | 
 17 | 
 18 | def is_discrete(df, column):
 19 | 	return column != None and df[column].dtype.kind in 'OSUV'
 20 | 
 21 | 
 22 | 
 23 | def contingency_table(x, y, data, **kwargs):
 24 | 	# compute indices for categorical variables
 25 | 	x_values = sorted(list(set(x)))
 26 | 	y_values = sorted(list(set(y)))
 27 | 	x_idx = [x_values.index(x_i) for x_i in x]
 28 | 	y_idx = [y_values.index(y_i) for y_i in y]
 29 | 
 30 | 	# create contingency table
 31 | 	ct = pd.DataFrame(
 32 | 		np.zeros((len(y_values), len(x_values))),
 33 | 		index=y_values,
 34 | 		columns=x_values,
 35 | 		dtype=np.int32)
 36 | 
 37 | 	for x_i, y_i in zip(x_idx, y_idx):
 38 | 		ct.iloc[y_i, x_i] += 1
 39 | 
 40 | 	# plot contingency table
 41 | 	sns.heatmap(ct, annot=True, fmt='d', cbar=False, square=True, **kwargs)
 42 | 
 43 | 
 44 | 
 45 | def visualize(data, args):
 46 | 	defaults = {
 47 | 		'plot_type': None,
 48 | 		'yaxis': None,
 49 | 		'row': None,
 50 | 		'col': None,
 51 | 		'hue': None,
 52 | 		'selectors': [],
 53 | 		'sharex': False,
 54 | 		'sharey': False,
 55 | 		'height': 3,
 56 | 		'aspect': 1,
 57 | 		'color': None,
 58 | 		'palette': None,
 59 | 		'xscale': None,
 60 | 		'yscale': None,
 61 | 		'rotate_xticklabels': False,
 62 | 		'rotate_yticklabels': False
 63 | 	}
 64 | 
 65 | 	args = {**defaults, **args}
 66 | 
 67 | 	# prepare axis columns in dataframe
 68 | 	axes = [
 69 | 		args['xaxis'],
 70 | 		args['yaxis'],
 71 | 		args['row'],
 72 | 		args['col'],
 73 | 		args['hue']
 74 | 	]
 75 | 
 76 | 	for column in axes:
 77 | 		# skip columns which were not specified
 78 | 		if column == None:
 79 | 			continue
 80 | 
 81 | 		# remove rows which have missing values in column
 82 | 		data = data[~data[column].isna()]
 83 | 
 84 | 	# apply selectorss to dataframe
 85 | 	for selector in args['selectors']:
 86 | 		# parse column and selected values
 87 | 		column, values = selector.split('=')
 88 | 		values = values.split(',')
 89 | 
 90 | 		# select rows from dataframe
 91 | 		if values != None and len(values) > 0:
 92 | 			data = select_rows_by_values(data, column, values)
 93 | 
 94 | 	if len(data.index) == 0:
 95 | 		raise RuntimeError('error: no data to visualize')
 96 | 
 97 | 	# sort data by row, col, and hue values
 98 | 	if args['row'] != None:
 99 | 		data.sort_values(by=args['row'], inplace=True, kind='mergesort')
100 | 
101 | 	if args['col'] != None:
102 | 		data.sort_values(by=args['col'], inplace=True, kind='mergesort')
103 | 
104 | 	if args['hue'] != None:
105 | 		data.sort_values(by=args['hue'], inplace=True, kind='mergesort')
106 | 
107 | 	# create a facet grid for plotting
108 | 	g = sns.FacetGrid(
109 | 		data,
110 | 		row=args['row'],
111 | 		col=args['col'],
112 | 		sharex=args['sharex'],
113 | 		sharey=args['sharey'],
114 | 		height=args['height'],
115 | 		aspect=args['aspect'],
116 | 		margin_titles=True)
117 | 
118 | 	# determine plot type if not specified
119 | 	if args['plot_type'] == None:
120 | 		# if x is continuous, use histogram
121 | 		if is_continuous(data, args['xaxis']) and args['yaxis'] == None:
122 | 			args['plot_type'] = 'hist'
123 | 
124 | 		# if x is discrete, use count plot
125 | 		elif is_discrete(data, args['xaxis']) and args['yaxis'] == None:
126 | 			args['plot_type'] = 'count'
127 | 
128 | 		# if x and y are continuous, use scatter plot
129 | 		elif is_continuous(data, args['xaxis']) and is_continuous(data, args['yaxis']):
130 | 			args['plot_type'] = 'scatter'
131 | 
132 | 		# if x and y are discrete, use contingency table
133 | 		elif is_discrete(data, args['xaxis']) and is_discrete(data, args['yaxis']):
134 | 			args['plot_type'] = 'ct'
135 | 
136 | 		# if x is discrete and y is continuous, use bar plot
137 | 		elif is_discrete(data, args['xaxis']) and is_continuous(data, args['yaxis']):
138 | 			args['plot_type'] = 'bar'
139 | 
140 | 		# otherwise throw an error
141 | 		else:
142 | 			raise RuntimeError('error: could not find a plotting method for the given axes')
143 | 
144 | 	# create order of x values for discrete plots
145 | 	# unless y-axis sorting is enabled (so as not to override it)
146 | 	if is_discrete(data, args['xaxis']):
147 | 		x_values = sorted(list(set(data[args['xaxis']])))
148 | 	else:
149 | 		x_values = None
150 | 
151 | 	# create plot
152 | 	if args['plot_type'] == 'hist':
153 | 		g.map(
154 | 			sns.histplot,
155 | 			args['xaxis'],
156 | 			color=args['color'])
157 | 
158 | 	elif args['plot_type'] == 'count':
159 | 		g.map(
160 | 			sns.countplot,
161 | 			args['xaxis'],
162 | 			hue=args['hue'],
163 | 			color=args['color'],
164 | 			palette=args['palette'])
165 | 
166 | 	elif args['plot_type'] == 'scatter':
167 | 		g = g.map(
168 | 			sns.scatterplot,
169 | 			args['xaxis'],
170 | 			args['yaxis'],
171 | 			hue=args['hue'],
172 | 			data=data,
173 | 			color=args['color'])
174 | 
175 | 		if args['hue'] != None:
176 | 			g.add_legend()
177 | 
178 | 	elif args['plot_type'] == 'ct':
179 | 		g = g.map(
180 | 			contingency_table,
181 | 			args['xaxis'],
182 | 			args['yaxis'],
183 | 			data=data,
184 | 			color=args['color'])
185 | 
186 | 	elif args['plot_type'] == 'bar':
187 | 		g = g.map(
188 | 			sns.barplot,
189 | 			args['xaxis'],
190 | 			args['yaxis'],
191 | 			hue=args['hue'],
192 | 			data=data,
193 | 			ci=68,
194 | 			color=args['color'],
195 | 			palette=args['palette'],
196 | 			order=x_values)
197 | 
198 | 		if args['hue'] != None:
199 | 			g.add_legend()
200 | 
201 | 	elif args['plot_type'] == 'point':
202 | 		g = g.map(
203 | 			sns.pointplot,
204 | 			args['xaxis'],
205 | 			args['yaxis'],
206 | 			hue=args['hue'],
207 | 			data=data,
208 | 			ci=68,
209 | 			capsize=0.1,
210 | 			color=args['color'],
211 | 			palette=args['palette'],
212 | 			markers='x',
213 | 			linestyles='--',
214 | 			order=x_values)
215 | 
216 | 		if args['hue'] != None:
217 | 			g.add_legend()
218 | 
219 | 	# set x-axis scale if specified
220 | 	if args['xscale'] != None:
221 | 		g.set(xscale=args['xscale'])
222 | 
223 | 	# set y-axis scale if specified
224 | 	if args['yscale'] != None:
225 | 		g.set(yscale=args['yscale'])
226 | 
227 | 	# rotate x-axis tick labels if specified
228 | 	if args['rotate_xticklabels']:
229 | 		plt.xticks(rotation=45)
230 | 
231 | 	# rotate y-axis tick labels if specified
232 | 	if args['rotate_yticklabels']:
233 | 		plt.yticks(rotation=45)
234 | 
235 | 	# disable x-axis ticks if there are too many categories
236 | 	if is_discrete(data, args['xaxis']) and len(set(data[args['xaxis']])) >= 100:
237 | 		plt.xticks([])
238 | 
239 | 	# save output figure
240 | 	outfile = '/tmp/%s.png' % (args['plot_name'])
241 | 	plt.savefig(outfile)
242 | 	plt.close()
243 | 
244 | 	return outfile


--------------------------------------------------------------------------------
/bin/workflow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import asyncio
  4 | import os
  5 | import signal
  6 | import subprocess
  7 | 
  8 | import env
  9 | 
 10 | 
 11 | 
 12 | def get_run_name(workflow):
 13 | 	return 'workflow-%s-%04d' % (workflow['_id'], workflow['attempts'])
 14 | 
 15 | 
 16 | 
 17 | def run_workflow(workflow, work_dir, resume):
 18 | 	# save current directory
 19 | 	prev_dir = os.getcwd()
 20 | 
 21 | 	# change to workflow directory
 22 | 	os.chdir(work_dir)
 23 | 
 24 | 	# prepare command line arguments
 25 | 	run_name = get_run_name(workflow)
 26 | 
 27 | 	if env.NXF_EXECUTOR == 'k8s':
 28 | 		args = [
 29 | 			'nextflow',
 30 | 			'-log', os.path.join(workflow['output_dir'], 'nextflow.log'),
 31 | 			'kuberun',
 32 | 			workflow['pipeline'],
 33 | 			'-ansi-log', 'false',
 34 | 			'-latest',
 35 | 			'-name', run_name,
 36 | 			'-profile', workflow['profiles'],
 37 | 			'-revision', workflow['revision'],
 38 | 			'-volume-mount', env.PVC_NAME
 39 | 		]
 40 | 
 41 | 	elif env.NXF_EXECUTOR == 'local':
 42 | 		args = [
 43 | 			'nextflow',
 44 | 			'-log', os.path.join(workflow['output_dir'], 'nextflow.log'),
 45 | 			'run',
 46 | 			workflow['pipeline'],
 47 | 			'-ansi-log', 'false',
 48 | 			'-latest',
 49 | 			'-name', run_name,
 50 | 			'-profile', workflow['profiles'],
 51 | 			'-revision', workflow['revision']
 52 | 		]
 53 | 
 54 | 	elif env.NXF_EXECUTOR == 'pbspro':
 55 | 		args = [
 56 | 			'nextflow',
 57 | 			'-log', os.path.join(workflow['output_dir'], 'nextflow.log'),
 58 | 			'run',
 59 | 			workflow['pipeline'],
 60 | 			'-ansi-log', 'false',
 61 | 			'-latest',
 62 | 			'-name', run_name,
 63 | 			'-profile', workflow['profiles'],
 64 | 			'-revision', workflow['revision']
 65 | 		]
 66 | 
 67 | 	# add params file if specified
 68 | 	if workflow['params_format'] and workflow['params_data']:
 69 | 		params_filename = 'params.%s' % (workflow['params_format'])
 70 | 		params_file = open(params_filename, 'w')
 71 | 		params_file.write(workflow['params_data'])
 72 | 		params_file.close()
 73 | 
 74 | 		args += ['-params-file', params_filename]
 75 | 
 76 | 	# add resume option if specified
 77 | 	if resume:
 78 | 		args += ['-resume']
 79 | 
 80 | 	# launch workflow asynchronously
 81 | 	proc = subprocess.Popen(
 82 | 		args,
 83 | 		stdout=open('.workflow.log', 'w'),
 84 | 		stderr=subprocess.STDOUT
 85 | 	)
 86 | 
 87 | 	# return to original directory
 88 | 	os.chdir(prev_dir)
 89 | 
 90 | 	return proc
 91 | 
 92 | 
 93 | 
 94 | def save_output(workflow, output_dir):
 95 | 	return subprocess.Popen(
 96 | 		['scripts/kube-save.sh', workflow['_id'], output_dir],
 97 | 		stdout=subprocess.PIPE,
 98 | 		stderr=subprocess.STDOUT
 99 | 	)
100 | 
101 | 
102 | 
103 | async def set_property(db, workflow, key, value):
104 | 	workflow[key] = value
105 | 	await db.workflow_update(workflow['_id'], workflow)
106 | 
107 | 
108 | 
109 | async def launch_async(db, workflow, resume):
110 | 	# re-initialize database backend
111 | 	db.initialize()
112 | 
113 | 	# start workflow
114 | 	work_dir = os.path.join(env.WORKFLOWS_DIR, workflow['_id'])
115 | 	proc = run_workflow(workflow, work_dir, resume)
116 | 	proc_pid = proc.pid
117 | 
118 | 	print('%d: saving workflow pid...' % (proc_pid))
119 | 
120 | 	# save workflow pid
121 | 	await set_property(db, workflow, 'pid', proc.pid)
122 | 
123 | 	print('%d: waiting for workflow to finish...' % (proc_pid))
124 | 
125 | 	# wait for workflow to complete
126 | 	if proc.wait() == 0:
127 | 		print('%d: workflow completed' % (proc_pid))
128 | 		await set_property(db, workflow, 'status', 'completed')
129 | 	else:
130 | 		print('%d: workflow failed' % (proc_pid))
131 | 		await set_property(db, workflow, 'status', 'failed')
132 | 		return
133 | 
134 | 	print('%d: saving output data...' % (proc_pid))
135 | 
136 | 	# save output data
137 | 	output_dir = os.path.join(env.WORKFLOWS_DIR, workflow['_id'], workflow['output_dir'])
138 | 	proc = save_output(workflow, output_dir)
139 | 
140 | 	proc_out, _ = proc.communicate()
141 | 	print(proc_out.decode('utf-8'))
142 | 
143 | 	if proc.wait() == 0:
144 | 		print('%d: save output data completed' % (proc_pid))
145 | 	else:
146 | 		print('%d: save output data failed' % (proc_pid))
147 | 
148 | 
149 | 
150 | def launch(db, workflow, resume):
151 | 	asyncio.run(launch_async(db, workflow, resume))
152 | 
153 | 
154 | 
155 | def cancel(workflow):
156 | 	# terminate child process
157 | 	if workflow['pid'] != -1:
158 | 		try:
159 | 			os.kill(workflow['pid'], signal.SIGINT)
160 | 		except ProcessLookupError:
161 | 			pass
162 | 
163 | 	# delete pods if relevant
164 | 	if env.NXF_EXECUTOR == 'k8s':
165 | 		proc = subprocess.Popen(
166 | 			['scripts/kube-cancel.sh', get_run_name(workflow)],
167 | 			stdout=subprocess.PIPE,
168 | 			stderr=subprocess.STDOUT
169 | 		)
170 | 		proc_out, _ = proc.communicate()
171 | 		print(proc_out.decode('utf-8'))
172 | 


--------------------------------------------------------------------------------
/cli/cancel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Cancel a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <id>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | 
13 | # cancel a workflow instance
14 | curl -s -X POST ${URL}/api/workflows/${ID}/cancel
15 | 
16 | echo
17 | 


--------------------------------------------------------------------------------
/cli/create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Create a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <pipeline>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | PIPELINE="$2"
12 | 
13 | # create a workflow instance
14 | curl -s \
15 | 	-X POST \
16 | 	-d "{\"pipeline\":\"${PIPELINE}\"}" \
17 | 	${URL}/api/workflows/0
18 | 
19 | echo
20 | 


--------------------------------------------------------------------------------
/cli/delete.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Delete a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <id>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | 
13 | # delete a workflow instance
14 | curl -s -X DELETE ${URL}/api/workflows/${ID}
15 | 
16 | echo
17 | 


--------------------------------------------------------------------------------
/cli/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Download output data for a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <id>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | 
13 | # download output data for a workflow instance
14 | curl -s -o "${ID}-output.tar.gz" ${URL}/api/workflows/${ID}/download
15 | 
16 | echo
17 | 


--------------------------------------------------------------------------------
/cli/get.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Get a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <id>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | 
13 | # get a workflow instance
14 | curl -s -X GET ${URL}/api/workflows/${ID}
15 | 
16 | echo
17 | 


--------------------------------------------------------------------------------
/cli/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Launch a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <id>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | 
13 | # launch a workflow instance
14 | curl -s -X POST ${URL}/api/workflows/${ID}/launch
15 | 
16 | echo
17 | 


--------------------------------------------------------------------------------
/cli/log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Get the log of a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <url> <id>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | 
13 | # get the log of a workflow instance
14 | curl -s -X GET ${URL}/api/workflows/${ID}/log
15 | 
16 | echo
17 | 


--------------------------------------------------------------------------------
/cli/query.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # List all workflow instances on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 1 ]]; then
 6 | 	echo "usage: $0 <url>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | 
12 | # list all workflow instances
13 | curl -s -X GET ${URL}/api/workflows
14 | 
15 | echo
16 | 


--------------------------------------------------------------------------------
/cli/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Upload input data for a workflow instance on a nextflow server.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 3 ]]; then
 6 | 	echo "usage: $0 <url> <id> <filename>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | URL="$1"
11 | ID="$2"
12 | FILENAME="$3"
13 | 
14 | # upload data to a workflow instance
15 | curl -s \
16 | 	-F "filename=$(basename FILENAME)" \
17 | 	-F "body=@${FILENAME}" \
18 | 	${URL}/api/workflows/${ID}/upload
19 | 
20 | echo
21 | 


--------------------------------------------------------------------------------
/client/app.js:
--------------------------------------------------------------------------------
  1 | 'use strict'
  2 | 
  3 | const app = angular.module('app', [
  4 | 	'ngRoute',
  5 | 	'angularFileUpload'
  6 | ])
  7 | 
  8 | 
  9 | 
 10 | app.config(['$compileProvider', function($compileProvider) {
 11 | 	$compileProvider.debugInfoEnabled(false)
 12 | }])
 13 | 
 14 | 
 15 | 
 16 | app.config(['$routeProvider', function($routeProvider) {
 17 | 	$routeProvider
 18 | 		.when('/', { redirectTo: '/workflows' })
 19 | 		.when('/workflows', {
 20 | 			templateUrl: 'views/workflows.html',
 21 | 			controller: 'WorkflowsCtrl'
 22 | 		})
 23 | 		.when('/workflows/:id', {
 24 | 			templateUrl: 'views/workflow.html',
 25 | 			controller: 'WorkflowCtrl'
 26 | 		})
 27 | 		.when('/tasks', {
 28 | 			templateUrl: 'views/tasks.html',
 29 | 			controller: 'TasksCtrl'
 30 | 		})
 31 | 		.when('/tasks/:id', {
 32 | 			templateUrl: 'views/task.html',
 33 | 			controller: 'TaskCtrl'
 34 | 		})
 35 | 		.when('/visualizer', {
 36 | 			templateUrl: 'views/visualizer.html',
 37 | 			controller: 'VisualizerCtrl'
 38 | 		})
 39 | 		.when('/model', {
 40 | 			templateUrl: 'views/model.html',
 41 | 			controller: 'ModelCtrl'
 42 | 		})
 43 | 		.otherwise('/')
 44 | }])
 45 | 
 46 | 
 47 | 
 48 | app.service('alert', ['$interval', function($interval) {
 49 | 	this.alerts = []
 50 | 
 51 | 	const self = this
 52 | 	let count = 0
 53 | 
 54 | 	const addAlert = function(type, header, message) {
 55 | 		let id = count
 56 | 		let promise = $interval(function() {
 57 | 			let index = self.alerts.findIndex(function(alert) {
 58 | 				return (alert.id === id)
 59 | 			})
 60 | 
 61 | 			self.alerts.splice(index, 1)
 62 | 		}, 10000, 1)
 63 | 
 64 | 		self.alerts.push({
 65 | 			id: id,
 66 | 			type: type,
 67 | 			header: header,
 68 | 			message: message,
 69 | 			promise: promise
 70 | 		})
 71 | 		count++
 72 | 	}
 73 | 
 74 | 	this.success = function(message) {
 75 | 		addAlert('success', null, message)
 76 | 	}
 77 | 
 78 | 	this.info = function(message) {
 79 | 		addAlert('info', null, message)
 80 | 	}
 81 | 
 82 | 	this.warning = function(message) {
 83 | 		addAlert('warning', null, message)
 84 | 	}
 85 | 
 86 | 	this.error = function(message) {
 87 | 		addAlert('danger', 'Error: ', message)
 88 | 	}
 89 | 
 90 | 	this.remove = function(index) {
 91 | 		$interval.cancel(self.alerts[index].promise)
 92 | 
 93 | 		self.alerts.splice(index, 1)
 94 | 	}
 95 | }])
 96 | 
 97 | 
 98 | 
 99 | app.service('api', ['$http', '$q', function($http, $q) {
100 | 	function httpRequest(method, url, params, data) {
101 | 		return $http({
102 | 			method: method,
103 | 			url: window.location.pathname + url,
104 | 			params: params,
105 | 			data: data
106 | 		}).then(function(res) {
107 | 			return res.data
108 | 		}, function(res) {
109 | 			return $q.reject(res.data)
110 | 		})
111 | 	}
112 | 
113 | 	this.Workflow = {}
114 | 
115 | 	this.Workflow.query = function(page) {
116 | 		return httpRequest('get', 'api/workflows', { page: page })
117 | 	}
118 | 
119 | 	this.Workflow.get = function(id) {
120 | 		return httpRequest('get', `api/workflows/${id}`)
121 | 	}
122 | 
123 | 	this.Workflow.save = function(workflow) {
124 | 		return httpRequest('post', `api/workflows/${workflow._id}`, null, workflow)
125 | 	}
126 | 
127 | 	this.Workflow.launch = function(id) {
128 | 		return httpRequest('post', `api/workflows/${id}/launch`)
129 | 	}
130 | 
131 | 	this.Workflow.resume = function(id) {
132 | 		return httpRequest('post', `api/workflows/${id}/resume`)
133 | 	}
134 | 
135 | 	this.Workflow.cancel = function(id) {
136 | 		return httpRequest('post', `api/workflows/${id}/cancel`)
137 | 	}
138 | 
139 | 	this.Workflow.log = function(id) {
140 | 		return httpRequest('get', `api/workflows/${id}/log`)
141 | 	}
142 | 
143 | 	this.Workflow.remove = function(id) {
144 | 		return httpRequest('delete', `api/workflows/${id}`)
145 | 	}
146 | 
147 | 	this.Task = {}
148 | 
149 | 	this.Task.query = function(page) {
150 | 		return httpRequest('get', 'api/tasks', { page: page })
151 | 	}
152 | 
153 | 	this.Task.query_pipelines = function() {
154 | 		return httpRequest('get', `api/tasks/pipelines`)
155 | 	}
156 | 
157 | 	this.Task.query_pipeline = function(pipeline) {
158 | 		return httpRequest('get', `api/tasks/pipelines/${pipeline}`)
159 | 	}
160 | 
161 | 	this.Task.archive = function(pipeline) {
162 | 		return httpRequest('get', `api/tasks/archive/${pipeline}`)
163 | 	}
164 | 
165 | 	this.Task.get = function(id) {
166 | 		return httpRequest('get', `api/tasks/${id}`)
167 | 	}
168 | 
169 | 	this.Task.log = function(id) {
170 | 		return httpRequest('get', `api/tasks/${id}/log`)
171 | 	}
172 | 
173 | 	this.Task.visualize = function(pipeline, process, args) {
174 | 		return httpRequest('post', `api/tasks/visualize`, null, {
175 | 			pipeline,
176 | 			process,
177 | 			args
178 | 		})
179 | 	}
180 | 
181 | 	this.Model = {}
182 | 
183 | 	this.Model.train = function(pipeline, process, args) {
184 | 		return httpRequest('post', `api/model/train`, null, {
185 | 			pipeline,
186 | 			process,
187 | 			args
188 | 		})
189 | 	}
190 | 
191 | 	this.Model.get_config = function(pipeline, process, target) {
192 | 		return httpRequest('get', `api/model/config`, {
193 | 			pipeline,
194 | 			process,
195 | 			target
196 | 		})
197 | 	}
198 | 
199 | 	this.Model.predict = function(pipeline, process, target, inputs) {
200 | 		return httpRequest('post', `api/model/predict`, null, {
201 | 			pipeline,
202 | 			process,
203 | 			target,
204 | 			inputs
205 | 		})
206 | 	}
207 | }])
208 | 
209 | 
210 | 
211 | app.controller('MainCtrl', ['$scope', 'alert', function($scope, alert) {
212 | 	$scope.alert = alert
213 | }])
214 | 
215 | 
216 | 
217 | const STATUS_COLORS = {
218 | 	'nascent': 'primary',
219 | 	'running': 'warning',
220 | 	'completed': 'success',
221 | 	'failed': 'danger'
222 | }
223 | 
224 | 
225 | 
226 | app.controller('WorkflowsCtrl', ['$scope', '$route', 'alert', 'api', function($scope, $route, alert, api) {
227 | 	$scope.STATUS_COLORS = STATUS_COLORS
228 | 	$scope.page = 0
229 | 	$scope.workflows = []
230 | 
231 | 	$scope.query = function(page) {
232 | 		api.Workflow.query(page)
233 | 			.then(function(workflows) {
234 | 				$scope.page = page
235 | 				$scope.workflows = workflows
236 | 			}, function() {
237 | 				alert.error('Failed to query workflow instances.')
238 | 			})
239 | 	}
240 | 
241 | 	$scope.delete = function(w) {
242 | 		if ( !confirm(`Are you sure you want to delete \"${w._id}\"?`) ) {
243 | 			return
244 | 		}
245 | 
246 | 		api.Workflow.remove(w._id)
247 | 			.then(function() {
248 | 				alert.success('Workflow instance deleted.')
249 | 				$route.reload()
250 | 			}, function() {
251 | 				alert.error('Failed to delete workflow instance.')
252 | 			})
253 | 	}
254 | 
255 | 	// initialize
256 | 	$scope.query(0)
257 | }])
258 | 
259 | 
260 | 
261 | app.controller('WorkflowCtrl', ['$scope', '$interval', '$route', 'alert', 'api', 'FileUploader', function($scope, $interval, $route, alert, api, FileUploader) {
262 | 	$scope.STATUS_COLORS = STATUS_COLORS
263 | 	$scope.workflow = {}
264 | 
265 | 	$scope.uploader = new FileUploader({
266 | 		 url: `${window.location.pathname}api/workflows/${$route.current.params.id}/upload`
267 | 	})
268 | 
269 | 	$scope.uploader.onCompleteAll = function() {
270 | 		alert.success('All input files uploaded.')
271 | 		$scope.uploading = false
272 | 		$route.reload()
273 | 	}
274 | 
275 | 	$scope.uploader.onErrorItem = function() {
276 | 		alert.error('Failed to upload input files.')
277 | 		$scope.uploading = false
278 | 	}
279 | 
280 | 	$scope.save = function(workflow) {
281 | 		api.Workflow.save(workflow)
282 | 			.then(function(res) {
283 | 				alert.success('Workflow instance saved.')
284 | 				$route.updateParams({ id: res._id })
285 | 			}, function() {
286 | 				alert.error('Failed to save workflow instance.')
287 | 			})
288 | 	}
289 | 
290 | 	$scope.upload = function() {
291 | 		$scope.uploading = true
292 | 		$scope.uploader.uploadAll()
293 | 	}
294 | 
295 | 	$scope.launch = function(id) {
296 | 		$scope.launching = true
297 | 
298 | 		api.Workflow.launch(id)
299 | 			.then(function() {
300 | 				alert.success('Workflow instance launched.')
301 | 				$scope.workflow.status = ''
302 | 				$scope.workflow.log = ''
303 | 				$scope.launching = false
304 | 				$scope.fetchLog()
305 | 			}, function() {
306 | 				alert.error('Failed to launch workflow instance.')
307 | 				$scope.launching = false
308 | 			})
309 | 	}
310 | 
311 | 	$scope.resume = function(id) {
312 | 		$scope.resuming = true
313 | 
314 | 		api.Workflow.resume(id)
315 | 			.then(function() {
316 | 				alert.success('Workflow instance resumed.')
317 | 				$scope.workflow.status = ''
318 | 				$scope.workflow.log = ''
319 | 				$scope.resuming = false
320 | 				$scope.fetchLog()
321 | 			}, function() {
322 | 				alert.error('Failed to resume workflow instance.')
323 | 				$scope.resuming = false
324 | 			})
325 | 	}
326 | 
327 | 	$scope.cancel = function(id) {
328 | 		$scope.cancelling = true
329 | 
330 | 		api.Workflow.cancel(id)
331 | 			.then(function() {
332 | 				alert.success('Workflow instance canceled.')
333 | 				$scope.cancelling = false
334 | 				$route.reload()
335 | 			}, function() {
336 | 				alert.error('Failed to cancel workflow instance.')
337 | 				$scope.cancelling = false
338 | 			})
339 | 	}
340 | 
341 | 	$scope.fetchLog = function() {
342 | 		if ( $scope.intervalPromise ) {
343 | 			return
344 | 		}
345 | 
346 | 		$scope.intervalPromise = $interval(function() {
347 | 			api.Workflow.log($scope.workflow._id)
348 | 				.then(function(res) {
349 | 					Object.assign($scope.workflow, res)
350 | 
351 | 					if ( res.status !== 'running' ) {
352 | 						$interval.cancel($scope.intervalPromise)
353 | 						$scope.intervalPromise = undefined
354 | 					}
355 | 				})
356 | 		}, 2000, -1)
357 | 	}
358 | 
359 | 	$scope.$on('$destroy', function() {
360 | 		if ( angular.isDefined($scope.intervalPromise) ) {
361 | 			$interval.cancel($scope.intervalPromise)
362 | 		}
363 | 	})
364 | 
365 | 	// initialize
366 | 	api.Workflow.get($route.current.params.id)
367 | 		.then(function(workflow) {
368 | 			$scope.workflow = workflow
369 | 
370 | 			if ( $scope.workflow._id !== '0' ) {
371 | 				$scope.fetchLog()
372 | 			}
373 | 		}, function() {
374 | 			alert.error('Failed to load workflow.')
375 | 		})
376 | }])
377 | 
378 | 
379 | 
380 | app.controller('TasksCtrl', ['$scope', 'alert', 'api', function($scope, alert, api) {
381 | 	$scope.page = 0
382 | 	$scope.tasks = []
383 | 
384 | 	$scope.query_pipelines = function() {
385 | 		api.Task.query_pipelines()
386 | 			.then(function(pipelines) {
387 | 				$scope.pipelines = pipelines
388 | 			}, function() {
389 | 				alert.error('Failed to query pipelines.')
390 | 			})
391 | 	}
392 | 
393 | 	$scope.query_tasks = function(page) {
394 | 		api.Task.query(page)
395 | 			.then(function(tasks) {
396 | 				$scope.page = page
397 | 				$scope.tasks = tasks
398 | 			}, function() {
399 | 				alert.error('Failed to query tasks.')
400 | 			})
401 | 	}
402 | 
403 | 	$scope.archive = function(pipeline) {
404 | 		$scope.archiving = true
405 | 
406 | 		api.Task.archive(pipeline)
407 | 			.then(function() {
408 | 				$scope.archiving = false
409 | 				$scope.archive_success = true
410 | 
411 | 				alert.success('Archive was created.')
412 | 			}, function() {
413 | 				$scope.archiving = false
414 | 				$scope.archive_success = false
415 | 
416 | 				alert.error('Failed to create archive.')
417 | 			})
418 | 	}
419 | 
420 | 	// initialize
421 | 	$scope.query_pipelines()
422 | 	$scope.query_tasks(0)
423 | }])
424 | 
425 | 
426 | 
427 | app.controller('TaskCtrl', ['$scope', '$route', 'alert', 'api', function($scope, $route, alert, api) {
428 | 	$scope.task = {}
429 | 	$scope.task_out = ''
430 | 	$scope.task_err = ''
431 | 
432 | 	$scope.fetchLog = function() {
433 | 		api.Task.log($route.current.params.id)
434 | 			.then(function(res) {
435 | 				$scope.task_out = res.out
436 | 				$scope.task_err = res.err
437 | 			}, function() {
438 | 				alert.error('Failed to fetch task logs.')
439 | 			})
440 | 	}
441 | 
442 | 	// initialize
443 | 	api.Task.get($route.current.params.id)
444 | 		.then(function(task) {
445 | 			$scope.task = task
446 | 		}, function() {
447 | 			alert.error('Failed to load task.')
448 | 		})
449 | }])
450 | 
451 | 
452 | 
453 | app.controller('VisualizerCtrl', ['$scope', 'alert', 'api', function($scope, alert, api) {
454 | 	$scope.args = {
455 | 		selectors: 'exit=0',
456 | 		height: 3,
457 | 		aspect: 1
458 | 	}
459 | 	$scope.columns = []
460 | 	$scope.merge_columns = []
461 | 
462 | 	$scope.query_pipelines = function() {
463 | 		api.Task.query_pipelines()
464 | 			.then(function(pipelines) {
465 | 				$scope.pipelines = pipelines
466 | 			}, function() {
467 | 				alert.error('Failed to query pipelines.')
468 | 			})
469 | 	}
470 | 
471 | 	$scope.query_dataset = function(pipeline) {
472 | 		$scope.querying = true
473 | 
474 | 		api.Task.query_pipeline(pipeline)
475 | 			.then(function(data) {
476 | 				let process_names = Object.keys(data)
477 | 				let process_columns = process_names.reduce((prev, process) => {
478 | 					let tasks = data[process]
479 | 					let columns = new Set(tasks.reduce((p, t) => p.concat(Object.keys(t)), []))
480 | 					prev[process] = Array.from(columns)
481 | 					return prev
482 | 				}, {})
483 | 
484 | 				$scope.querying = false
485 | 				$scope.pipeline_data = data
486 | 				$scope.process_names = process_names
487 | 				$scope.process_columns = process_columns
488 | 			}, function() {
489 | 				$scope.querying = false
490 | 				alert.error('Failed to query pipeline tasks.')
491 | 			})
492 | 	}
493 | 
494 | 	$scope.update_columns = function(process_columns, process, merge_process) {
495 | 		let array1 = process ? process_columns[process] : []
496 | 		let array2 = merge_process ? process_columns[merge_process] : []
497 | 
498 | 		$scope.columns = Array.from(new Set(array1.concat(array2)))
499 | 		$scope.merge_columns = array1.filter(value => array2.includes(value));
500 | 	}
501 | 
502 | 	$scope.visualize = function(pipeline, process, args) {
503 | 		$scope.visualizing = true
504 | 
505 | 		api.Task.visualize(pipeline, process, args)
506 | 			.then(function(image_data) {
507 | 				$scope.visualizing = false
508 | 				$scope.visualize_success = true
509 | 				$scope.image_data = image_data
510 | 				alert.success('Visualiation was created.')
511 | 			}, function() {
512 | 				$scope.visualizing = false
513 | 				$scope.visualize_success = false
514 | 				alert.error('Failed to visualize data.')
515 | 			})
516 | 	}
517 | 
518 | 	// initialize
519 | 	$scope.query_pipelines()
520 | }])
521 | 
522 | 
523 | 
524 | app.controller('ModelCtrl', ['$scope', 'alert', 'api', function($scope, alert, api) {
525 | 	$scope.args = {
526 | 		merge_process: null,
527 | 		inputs: [],
528 | 		target: null,
529 | 		scaler: 'maxabs',
530 | 		selectors: 'exit=0',
531 | 		hidden_layer_sizes: '128 128 128',
532 | 		epochs: 200
533 | 	}
534 | 	$scope.columns = []
535 | 	$scope.merge_columns = []
536 | 
537 | 	$scope.train = {}
538 | 	$scope.predict = {}
539 | 
540 | 	$scope.query_pipelines = function() {
541 | 		api.Task.query_pipelines()
542 | 			.then(function(pipelines) {
543 | 				$scope.pipelines = pipelines
544 | 			}, function() {
545 | 				alert.error('Failed to query pipelines.')
546 | 			})
547 | 	}
548 | 
549 | 	$scope.query_dataset = function(pipeline) {
550 | 		$scope.querying = true
551 | 
552 | 		api.Task.query_pipeline(pipeline)
553 | 			.then(function(data) {
554 | 				let process_names = Object.keys(data)
555 | 				let process_columns = process_names.reduce((prev, process) => {
556 | 					let tasks = data[process]
557 | 					let columns = new Set(tasks.reduce((p, t) => p.concat(Object.keys(t)), []))
558 | 					prev[process] = Array.from(columns)
559 | 					return prev
560 | 				}, {})
561 | 
562 | 				$scope.querying = false
563 | 				$scope.pipeline_data = data
564 | 				$scope.process_names = process_names
565 | 				$scope.process_columns = process_columns
566 | 			}, function() {
567 | 				$scope.querying = false
568 | 				alert.error('Failed to query pipeline tasks.')
569 | 			})
570 | 	}
571 | 
572 | 	$scope.update_columns = function(process_columns, process, merge_process) {
573 | 		let array1 = process ? process_columns[process] : []
574 | 		let array2 = merge_process ? process_columns[merge_process] : []
575 | 
576 | 		$scope.columns = Array.from(new Set(array1.concat(array2)))
577 | 		$scope.merge_columns = array1.filter(value => array2.includes(value));
578 | 	}
579 | 
580 | 	$scope.train = function(pipeline, process, args) {
581 | 		$scope.training = true
582 | 
583 | 		api.Model.train(pipeline, process, args)
584 | 			.then(function(results) {
585 | 				$scope.training = false
586 | 				$scope.train.results = results
587 | 				alert.success('Model was trained.')
588 | 			}, function() {
589 | 				$scope.training = false
590 | 				alert.error('Failed to train model.')
591 | 			})
592 | 	}
593 | 
594 | 	$scope.get_config = function(pipeline, process, target) {
595 | 		api.Model.get_config(pipeline, process, target)
596 | 			.then(function(config) {
597 | 				$scope.config = config
598 | 				$scope.predict.options = config.inputs
599 | 				$scope.predict.inputs = Object.keys(config.inputs).reduce((prev, input) => {
600 | 					prev[input] = null
601 | 					return prev
602 | 				}, {})
603 | 
604 | 				console.log($scope.predict)
605 | 			}, function() {
606 | 				alert.error('Failed to get model config.')
607 | 			})
608 | 	}
609 | 
610 | 	$scope.predict = function(pipeline, process, target, inputs) {
611 | 		$scope.predicting = true
612 | 
613 | 		api.Model.predict(pipeline, process, target, inputs)
614 | 			.then(function(results) {
615 | 				$scope.predicting = false
616 | 				$scope.predict.results = results
617 | 				alert.success('Performed model prediction.')
618 | 			}, function() {
619 | 				$scope.predicting = false
620 | 				alert.error('Failed to perform model prediction.')
621 | 			})
622 | 	}
623 | 
624 | 	// initialize
625 | 	$scope.query_pipelines()
626 | }])
627 | 


--------------------------------------------------------------------------------
/client/css/style.css:
--------------------------------------------------------------------------------
 1 | /* typography rules */
 2 | body {
 3 | 	font-family: monospace;
 4 | }
 5 | 
 6 | /* rules for button icons */
 7 | .button-icon {
 8 | 	color: #808080;
 9 | }
10 | 
11 | .button-icon:hover,
12 | .button-icon:focus {
13 | 	text-decoration: none;
14 | 	cursor: pointer;
15 | 	color: #303030;
16 | }
17 | 
18 | /* rules for alerts */
19 | #alerts {
20 | 	z-index: 2000;
21 | 	position: fixed;
22 | 	bottom: 0px;
23 | 	left: 20px;
24 | 	width: 300px;
25 | }
26 | 
27 | /* rules for pre text */
28 | pre {
29 | 	background-color: #f5f5f5;
30 | }
31 | 


--------------------------------------------------------------------------------
/client/favicon.ico:
--------------------------------------------------------------------------------
1 |           h     (                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                		
2 | 							 	
3 | 								
4 | 			 	 						
5 | 						                                                                


--------------------------------------------------------------------------------
/client/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en" ng-app="app" ng-strict-di>
 3 | <head>
 4 | 	<title>Nextflow API</title>
 5 | 	<meta charset="utf-8">
 6 | 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 | 
 8 | 	<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
 9 | 	<link rel="stylesheet" href="css/style.css">
10 | 
11 | 	<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.7.8/angular.min.js"></script>
12 | 	<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.7.8/angular-route.min.js"></script>
13 | 	<script src="https://nervgh.github.io/pages/angular-file-upload/dist/angular-file-upload.min.js"></script>
14 | 	<script src="app.js"></script>
15 | </head>
16 | <body ng-controller="MainCtrl">
17 | 	<nav class="navbar navbar-expand navbar-dark bg-dark mb-5">
18 | 		<a class="navbar-brand" href="#!/">Nextflow API</a>
19 | 
20 | 		<div class="collapse navbar-collapse justify-content-end">
21 | 			<div class="navbar-nav">
22 | 				<a class="nav-item nav-link" href="#!/workflows">Workflows</a>
23 | 				<a class="nav-item nav-link" href="#!/tasks">Tasks</a>
24 | 				<a class="nav-item nav-link" href="#!/visualizer">Visualizer</a>
25 | 				<a class="nav-item nav-link" href="#!/model">Model</a>
26 | 			</div>
27 | 		</div>
28 | 	</nav>
29 | 
30 | 	<div class="container-fluid">
31 | 		<ng-view></ng-view>
32 | 	</div>
33 | 
34 | 	<div id="alerts">
35 | 		<div ng-repeat="a in alert.alerts" class="alert alert-{{a.type}}">
36 | 			<button type="button" class="close" ng-click="alert.remove($index)">
37 | 				<span>&times;</span>
38 | 			</button>
39 | 			<div>
40 | 				<strong>{{a.header}}</strong>
41 | 				<span>{{a.message}}<span>
42 | 			</div>
43 | 		</div>
44 | 	</div>
45 | </body>
46 | </html>
47 | 


--------------------------------------------------------------------------------
/client/views/model.html:
--------------------------------------------------------------------------------
  1 | <div class="row justify-content-center">
  2 | 	<div class="col-sm-6">
  3 | 		<div class="card mb-4">
  4 | 			<h6 class="card-header">Query Dataset</h6>
  5 | 
  6 | 			<div class="card-body" ng-hide="pipelines">
  7 | 				<div class="text-center">
  8 | 					<span class="spinner-border spinner-border-sm"></span>
  9 | 					<span>Querying pipelines...</span>
 10 | 				</div>
 11 | 			</div>
 12 | 
 13 | 			<form class="card-body" name="form-query" ng-show="pipelines">
 14 | 				<div class="form-group row">
 15 | 					<label class="col-sm-3 col-form-label">Pipeline</label>
 16 | 					<div class="col-sm-9">
 17 | 						<select class="form-control" ng-model="pipeline" ng-options="p for p in pipelines"></select>
 18 | 					</div>
 19 | 				</div>
 20 | 
 21 | 				<div class="text-center">
 22 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="querying" ng-click="!querying && query_dataset(pipeline)">
 23 | 						<span ng-show="querying" class="spinner-border spinner-border-sm"></span>
 24 | 						<span ng-show="!querying">Query</span>
 25 | 					</button>
 26 | 				</div>
 27 | 			</form>
 28 | 		</div>
 29 | 
 30 | 		<div class="card mb-4" ng-show="process_names">
 31 | 			<h6 class="card-header">Train Model</h6>
 32 | 
 33 | 			<form class="card-body" name="form-train">
 34 | 				<div class="form-group row">
 35 | 					<label class="col-sm-3 col-form-label">Process</label>
 36 | 					<div class="col-sm-9">
 37 | 						<select class="form-control" ng-model="train.process" ng-options="p for p in process_names" ng-change="update_columns(process_columns, train.process, args.merge_process)"></select>
 38 | 					</div>
 39 | 				</div>
 40 | 
 41 | 				<div class="form-group row">
 42 | 					<label class="col-sm-3 col-form-label">Join Process</label>
 43 | 					<div class="col-sm-9">
 44 | 						<select class="form-control" ng-model="args.merge_process" ng-options="p for p in process_names" ng-change="update_columns(process_columns, train.process, args.merge_process)">
 45 | 							<option></option>
 46 | 						</select>
 47 | 					</div>
 48 | 				</div>
 49 | 
 50 | 				<div class="form-group row" ng-show="args.merge_process">
 51 | 					<label class="col-sm-3 col-form-label">Join On</label>
 52 | 					<div class="col-sm-9">
 53 | 						<select class="form-control" ng-model="args.merge_key" ng-options="p for p in merge_columns"></select>
 54 | 					</div>
 55 | 				</div>
 56 | 
 57 | 				<div class="form-group row" ng-show="train.process">
 58 | 					<label class="col-sm-3 col-form-label">Inputs</label>
 59 | 					<div class="col-sm-9">
 60 | 						<select class="form-control" ng-model="args.inputs" ng-options="p for p in columns" multiple size="10"></select>
 61 | 					</div>
 62 | 				</div>
 63 | 
 64 | 				<div class="form-group row" ng-show="train.process">
 65 | 					<label class="col-sm-3 col-form-label">Target</label>
 66 | 					<div class="col-sm-9">
 67 | 						<select class="form-control" ng-model="args.target" ng-options="p for p in columns"></select>
 68 | 					</div>
 69 | 				</div>
 70 | 
 71 | 				<div class="form-group row" ng-show="train.process">
 72 | 					<label class="col-sm-3 col-form-label">Selectors</label>
 73 | 					<div class="col-sm-9">
 74 | 						<input class="form-control" ng-model="args.selectors"/>
 75 | 					</div>
 76 | 				</div>
 77 | 
 78 | 				<div class="form-group row" ng-show="train.process">
 79 | 					<label class="col-sm-3 col-form-label">Scaler</label>
 80 | 					<div class="col-sm-9">
 81 | 						<select class="form-control" ng-model="args.scaler" ng-options="v for v in ['maxabs', 'minmax', 'standard']">
 82 | 							<option></option>
 83 | 						</select>
 84 | 					</div>
 85 | 				</div>
 86 | 
 87 | 				<div class="form-group row" ng-show="train.process">
 88 | 					<label class="col-sm-3 col-form-label">Hidden Layers</label>
 89 | 					<div class="col-sm-9">
 90 | 						<input class="form-control" ng-model="args.hidden_layer_sizes" ng-pattern="/[\d\s]*/"/>
 91 | 					</div>
 92 | 				</div>
 93 | 
 94 | 				<div class="form-group row" ng-show="train.process">
 95 | 					<label class="col-sm-3 col-form-label">Epochs</label>
 96 | 					<div class="col-sm-9">
 97 | 						<input class="form-control" type="number" ng-model="args.epochs"/>
 98 | 					</div>
 99 | 				</div>
100 | 
101 | 				<div class="text-center">
102 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="training" ng-click="!training && train(pipeline, train.process, args)">
103 | 						<span ng-show="training" class="spinner-border spinner-border-sm"></span>
104 | 						<span ng-show="!training">Train</span>
105 | 					</button>
106 | 				</div>
107 | 			</form>
108 | 		</div>
109 | 
110 | 		<div class="card mb-4" ng-show="train.results">
111 | 			<h6 class="card-header">Training Results</h6>
112 | 
113 | 			<div class="card-body">
114 | 				<div class="form-group row">
115 | 					<label class="col-sm-3 col-form-label">Prediction Error</label>
116 | 					<div class="col-sm-9">
117 | 						<p class="form-control-plaintext">{{train.results.mpe | number:3}} %</p>
118 | 					</div>
119 | 				</div>
120 | 
121 | 				<div class="form-group row">
122 | 					<label class="col-sm-3 col-form-label">Coverage</label>
123 | 					<div class="col-sm-9">
124 | 						<p class="form-control-plaintext">{{train.results.cov | number:3}} %</p>
125 | 					</div>
126 | 				</div>
127 | 			</div>
128 | 
129 | 			<div class="card-body text-center">
130 | 				<img ng-src="data:image/png;base64,{{train.results.scatterplot}}"/>
131 | 			</div>
132 | 		</div>
133 | 
134 | 		<div class="card mb-4" ng-show="process_names">
135 | 			<h6 class="card-header">Predict</h6>
136 | 
137 | 			<form class="card-body" name="form-get-config">
138 | 				<div class="form-group row">
139 | 					<label class="col-sm-3 col-form-label">Process</label>
140 | 					<div class="col-sm-9">
141 | 						<select class="form-control" ng-model="predict.process" ng-options="p for p in process_names"></select>
142 | 					</div>
143 | 				</div>
144 | 
145 | 				<div class="form-group row" ng-show="predict.process">
146 | 					<label class="col-sm-3 col-form-label">Target</label>
147 | 					<div class="col-sm-9">
148 | 						<select class="form-control" ng-model="predict.target" ng-options="p for p in columns"></select>
149 | 					</div>
150 | 				</div>
151 | 
152 | 				<div class="text-center">
153 | 					<button type="button" class="btn btn-outline-dark" ng-click="get_config(pipeline, predict.process, predict.target)">
154 | 						<span>Get Config</span>
155 | 					</button>
156 | 				</div>
157 | 			</form>
158 | 
159 | 			<form class="card-body" name="form-predict">
160 | 				<div class="form-group row" ng-repeat="(key, options) in predict.options">
161 | 					<label class="col-sm-3 col-form-label">{{key}}</label>
162 | 					<div class="col-sm-9" ng-show="options">
163 | 						<select class="form-control" ng-model="predict.inputs[key]" ng-options="v for v in options"></select>
164 | 					</div>
165 | 					<div class="col-sm-9" ng-hide="options">
166 | 						<input class="form-control" ng-model="predict.inputs[key]"/>
167 | 					</div>
168 | 				</div>
169 | 
170 | 				<div class="text-center">
171 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="predicting" ng-click="!predicting && predict(pipeline, predict.process, predict.target, predict.inputs)">
172 | 						<span ng-show="predicting" class="spinner-border spinner-border-sm"></span>
173 | 						<span ng-show="!predicting">Predict</span>
174 | 					</button>
175 | 				</div>
176 | 			</form>
177 | 		</div>
178 | 
179 | 		<div class="card mb-4" ng-show="predict.results">
180 | 			<h6 class="card-header">Prediction Results</h6>
181 | 
182 | 			<div class="card-body">
183 | 				<pre class="pre-scrollable border rounded p-2">{{predict.results | json}}</pre>
184 | 			</div>
185 | 		</div>
186 | 	</div>
187 | </div>
188 | 


--------------------------------------------------------------------------------
/client/views/task.html:
--------------------------------------------------------------------------------
 1 | <div class="row justify-content-center">
 2 | 	<div class="col-sm-8">
 3 | 		<div class="card mb-4">
 4 | 			<h6 class="card-header">Task</h6>
 5 | 
 6 | 			<div class="card-body">
 7 | 				<pre class="border rounded p-2">{{task | json}}</pre>
 8 | 			</div>
 9 | 		</div>
10 | 
11 | 		<div class="card mb-4" ng-show="(task.event === 'process_started') || (task.event === 'process_completed')">
12 | 			<h6 class="card-header">Logs</h6>
13 | 
14 | 			<div class="card-body">
15 | 				<div class="text-center">
16 | 					<button type="button" class="btn btn-outline-dark" ng-click="fetchLog()">Fetch Logs</button>
17 | 				</div>
18 | 
19 | 				<hr>
20 | 
21 | 				<p>Output Log:</p>
22 | 				<pre class="border rounded p-2">{{task_out}}</pre>
23 | 
24 | 				<hr>
25 | 
26 | 				<p>Error Log:</p>
27 | 				<pre class="border rounded p-2">{{task_err}}</pre>
28 | 			</div>
29 | 		</div>
30 | 	</div>
31 | </div>
32 | 


--------------------------------------------------------------------------------
/client/views/tasks.html:
--------------------------------------------------------------------------------
 1 | <div class="row">
 2 | 	<div class="col">
 3 | 		<div class="card mb-4">
 4 | 			<h6 class="card-header">Tasks</h6>
 5 | 
 6 | 			<form class="card-body" name="form">
 7 | 				<div class="form-group row">
 8 | 					<div class="col-sm-2">
 9 | 						<button type="button" class="btn btn-light" ng-disabled="page == 0" ng-click="page > 0 && query_tasks(page - 1)">newer</button>
10 | 						<button type="button" class="btn btn-light" ng-click="query_tasks(page + 1)">older</button>
11 | 					</div>
12 | 					<div class="col-sm-5" ng-show="pipelines">
13 | 						<select class="form-control" ng-model="pipeline" ng-options="p for p in pipelines" required></select>
14 | 					</div>
15 | 					<div class="col-sm-3" ng-show="pipelines">
16 | 						<button type="button" class="btn btn-block btn-outline-dark" ng-disabled="archiving" ng-click="!archiving && archive(pipeline)">
17 | 							<span ng-show="archiving" class="spinner-border spinner-border-sm"></span>
18 | 							<span ng-show="!archiving">Create Archive</span>
19 | 						</button>
20 | 					</div>
21 | 					<div class="col-sm-2" ng-show="archive_success">
22 | 						<a class="btn btn-block btn-outline-dark" ng-href="api/tasks/archive/{{pipeline}}/download" download>Download</a>
23 | 					</div>
24 | 				</div>
25 | 			</form>
26 | 
27 | 			<table class="table mb-0">
28 | 				<th scope="col">ID</th>
29 | 				<th scope="col">Run Name</th>
30 | 				<th scope="col">Time</th>
31 | 				<th scope="col">Event</th>
32 | 				<th scope="col"></th>
33 | 
34 | 				<tr ng-repeat="t in tasks">
35 | 					<td>
36 | 						<a ng-href="#!/tasks/{{t._id}}">{{t._id.slice(0, 8)}}</a>
37 | 					</td>
38 | 					<td>{{t.runName}}</td>
39 | 					<td>{{t.utcTime | date:'short'}}</td>
40 | 					<td>{{t.event}}</td>
41 | 				</tr>
42 | 			</table>
43 | 		</div>
44 | 	</div>
45 | </div>
46 | 


--------------------------------------------------------------------------------
/client/views/visualizer.html:
--------------------------------------------------------------------------------
  1 | <div class="row justify-content-center">
  2 | 	<div class="col-sm-6">
  3 | 		<div class="card mb-4">
  4 | 			<h6 class="card-header">Query Dataset</h6>
  5 | 
  6 | 			<div class="card-body" ng-hide="pipelines">
  7 | 				<div class="text-center">
  8 | 					<span class="spinner-border spinner-border-sm"></span>
  9 | 					<span>Querying pipelines...</span>
 10 | 				</div>
 11 | 			</div>
 12 | 
 13 | 			<form class="card-body" name="form-query" ng-show="pipelines">
 14 | 				<div class="form-group row">
 15 | 					<label class="col-sm-3 col-form-label">Pipeline</label>
 16 | 					<div class="col-sm-9">
 17 | 						<select class="form-control" ng-model="pipeline" ng-options="p for p in pipelines"></select>
 18 | 					</div>
 19 | 				</div>
 20 | 
 21 | 				<div class="text-center">
 22 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="querying" ng-click="!querying && query_dataset(pipeline)">
 23 | 						<span ng-show="querying" class="spinner-border spinner-border-sm"></span>
 24 | 						<span ng-show="!querying">Query</span>
 25 | 					</button>
 26 | 				</div>
 27 | 			</form>
 28 | 		</div>
 29 | 
 30 | 		<div class="card mb-4" ng-show="process_names">
 31 | 			<h6 class="card-header">Visualize</h6>
 32 | 
 33 | 			<form class="card-body" name="form-visualize">
 34 | 				<div class="form-group row">
 35 | 					<label class="col-sm-3 col-form-label">Process</label>
 36 | 					<div class="col-sm-9">
 37 | 						<select class="form-control" ng-model="process" ng-options="p for p in process_names" ng-change="update_columns(process_columns, process, args.merge_process)"></select>
 38 | 					</div>
 39 | 				</div>
 40 | 
 41 | 				<div class="form-group row">
 42 | 					<label class="col-sm-3 col-form-label">Join Process</label>
 43 | 					<div class="col-sm-9">
 44 | 						<select class="form-control" ng-model="args.merge_process" ng-options="p for p in process_names" ng-change="update_columns(process_columns, process, args.merge_process)">
 45 | 							<option></option>
 46 | 						</select>
 47 | 					</div>
 48 | 				</div>
 49 | 
 50 | 				<div class="form-group row" ng-show="args.merge_process">
 51 | 					<label class="col-sm-3 col-form-label">Join On</label>
 52 | 					<div class="col-sm-9">
 53 | 						<select class="form-control" ng-model="args.merge_key" ng-options="p for p in merge_columns"></select>
 54 | 					</div>
 55 | 				</div>
 56 | 
 57 | 				<div class="form-group row" ng-show="process">
 58 | 					<label class="col-sm-3 col-form-label">Plot Type</label>
 59 | 					<div class="col-sm-9">
 60 | 						<select class="form-control" ng-model="args.plot_type">
 61 | 							<option></option>
 62 | 							<option value="hist">Histogram</option>
 63 | 							<option value="count">Count Plot</option>
 64 | 							<option value="scatter">Scatter Plot</option>
 65 | 							<option value="ct">Contingency Table</option>
 66 | 							<option value="bar">Bar Plot</option>
 67 | 							<option value="point">Point Plot</option>
 68 | 						</select>
 69 | 					</div>
 70 | 				</div>
 71 | 
 72 | 				<div class="form-group row" ng-show="process">
 73 | 					<label class="col-sm-3 col-form-label">X Axis</label>
 74 | 					<div class="col-sm-9">
 75 | 						<select class="form-control" ng-model="args.xaxis" ng-options="p for p in columns" required></select>
 76 | 					</div>
 77 | 				</div>
 78 | 
 79 | 				<div class="form-group row" ng-show="process">
 80 | 					<label class="col-sm-3 col-form-label">Y Axis</label>
 81 | 					<div class="col-sm-9">
 82 | 						<select class="form-control" ng-model="args.yaxis" ng-options="p for p in columns">
 83 | 							<option></option>
 84 | 						</select>
 85 | 					</div>
 86 | 				</div>
 87 | 
 88 | 				<div class="form-group row" ng-show="process">
 89 | 					<label class="col-sm-3 col-form-label">Row Axis</label>
 90 | 					<div class="col-sm-9">
 91 | 						<select class="form-control" ng-model="args.row" ng-options="p for p in columns">
 92 | 							<option></option>
 93 | 						</select>
 94 | 					</div>
 95 | 				</div>
 96 | 
 97 | 				<div class="form-group row" ng-show="process">
 98 | 					<label class="col-sm-3 col-form-label">Col Axis</label>
 99 | 					<div class="col-sm-9">
100 | 						<select class="form-control" ng-model="args.col" ng-options="p for p in columns">
101 | 							<option></option>
102 | 						</select>
103 | 					</div>
104 | 				</div>
105 | 
106 | 				<div class="form-group row" ng-show="process">
107 | 					<label class="col-sm-3 col-form-label">Hue Axis</label>
108 | 					<div class="col-sm-9">
109 | 						<select class="form-control" ng-model="args.hue" ng-options="p for p in columns">
110 | 							<option></option>
111 | 						</select>
112 | 					</div>
113 | 				</div>
114 | 
115 | 				<div class="form-group row" ng-show="process">
116 | 					<label class="col-sm-3 col-form-label">Selectors</label>
117 | 					<div class="col-sm-9">
118 | 						<input class="form-control" ng-model="args.selectors"/>
119 | 					</div>
120 | 				</div>
121 | 
122 | 				<div class="form-group row" ng-show="process">
123 | 					<label class="col-sm-3 col-form-label">Height</label>
124 | 					<div class="col-sm-9">
125 | 						<input class="form-control" type="number" ng-model="args.height"/>
126 | 					</div>
127 | 				</div>
128 | 
129 | 				<div class="form-group row" ng-show="process">
130 | 					<label class="col-sm-3 col-form-label">Aspect Ratio</label>
131 | 					<div class="col-sm-9">
132 | 						<input class="form-control" type="number" ng-model="args.aspect"/>
133 | 					</div>
134 | 				</div>
135 | 
136 | 				<div class="text-center">
137 | 				<button type="button" class="btn btn-outline-dark" ng-disabled="visualizing" ng-click="!visualizing && visualize(pipeline, process, args)">
138 | 						<span ng-show="visualizing" class="spinner-border spinner-border-sm"></span>
139 | 						<span ng-show="!visualizing">Visualize</span>
140 | 					</button>
141 | 				</div>
142 | 			</form>
143 | 
144 | 			<div class="card-body text-center" ng-show="visualize_success">
145 | 				<img ng-src="data:image/png;base64,{{image_data}}"/>
146 | 			</div>
147 | 		</div>
148 | 	</div>
149 | </div>
150 | 


--------------------------------------------------------------------------------
/client/views/workflow.html:
--------------------------------------------------------------------------------
  1 | <div class="row justify-content-center">
  2 | 	<div class="col-sm-6">
  3 | 		<div class="card mb-4">
  4 | 			<h6 class="card-header">Workflow</h6>
  5 | 
  6 | 			<form class="card-body" name="form">
  7 | 				<div class="form-group row" ng-show="workflow._id != '0'">
  8 | 					<label class="col-sm-3 col-form-label">ID</label>
  9 | 					<div class="col-sm-9">
 10 | 						<p class="form-control-plaintext">{{workflow._id}}</p>
 11 | 					</div>
 12 | 				</div>
 13 | 
 14 | 				<div class="form-group row" ng-show="workflow._id != '0'">
 15 | 					<label class="col-sm-3 col-form-label">Date Created</label>
 16 | 					<div class="col-sm-9">
 17 | 						<p class="form-control-plaintext">{{workflow.date_created | date:'short'}}</p>
 18 | 					</div>
 19 | 				</div>
 20 | 
 21 | 				<div class="form-group row" ng-show="workflow._id != '0'">
 22 | 					<label class="col-sm-3 col-form-label">Date Submitted</label>
 23 | 					<div class="col-sm-9">
 24 | 						<p class="form-control-plaintext">{{workflow.date_submitted | date:'short'}}</p>
 25 | 					</div>
 26 | 				</div>
 27 | 
 28 | 				<div class="form-group row">
 29 | 					<label class="col-sm-3 col-form-label">Name</label>
 30 | 					<div class="col-sm-9">
 31 | 						<input
 32 | 							class="form-control"
 33 | 							name="name"
 34 | 							ng-model="workflow.name"
 35 | 						/>
 36 | 					</div>
 37 | 				</div>
 38 | 
 39 | 				<div class="form-group row">
 40 | 					<label class="col-sm-3 col-form-label">Pipeline</label>
 41 | 					<div class="col-sm-9">
 42 | 						<input
 43 | 							class="form-control"
 44 | 							ng-class="{'is-invalid': form.pipeline.$touched && form.pipeline.$invalid}"
 45 | 							name="pipeline"
 46 | 							ng-model="workflow.pipeline"
 47 | 							required
 48 | 						/>
 49 | 					</div>
 50 | 				</div>
 51 | 
 52 | 				<div class="form-group row">
 53 | 					<label class="col-sm-3 col-form-label">Profiles</label>
 54 | 					<div class="col-sm-9">
 55 | 						<input
 56 | 							class="form-control"
 57 | 							ng-class="{'is-invalid': form.profiles.$touched && form.profiles.$invalid}"
 58 | 							name="profiles"
 59 | 							ng-model="workflow.profiles"
 60 | 							required
 61 | 						/>
 62 | 					</div>
 63 | 				</div>
 64 | 
 65 | 				<div class="form-group row">
 66 | 					<label class="col-sm-3 col-form-label">Revision</label>
 67 | 					<div class="col-sm-9">
 68 | 						<input
 69 | 							class="form-control"
 70 | 							ng-class="{'is-invalid': form.revision.$touched && form.revision.$invalid}"
 71 | 							name="revision"
 72 | 							ng-model="workflow.revision"
 73 | 							required
 74 | 						/>
 75 | 					</div>
 76 | 				</div>
 77 | 
 78 | 				<div class="form-group row">
 79 | 					<label class="col-sm-3 col-form-label">Params Format</label>
 80 | 					<div class="col-sm-9">
 81 | 						<select
 82 | 							class="form-control"
 83 | 							name="params_format"
 84 | 							ng-model="workflow.params_format">
 85 | 							<option value="json">JSON</option>
 86 | 							<option value="yaml">YAML</option>
 87 | 						</select>
 88 | 					</div>
 89 | 				</div>
 90 | 
 91 | 				<div class="form-group row">
 92 | 					<label class="col-sm-3 col-form-label">Params</label>
 93 | 					<div class="col-sm-9">
 94 | 						<textarea
 95 | 							class="form-control"
 96 | 							rows="8"
 97 | 							name="params_data"
 98 | 							ng-model="workflow.params_data">
 99 | 						</textarea>
100 | 					</div>
101 | 				</div>
102 | 
103 | 				<div class="form-group row">
104 | 					<label class="col-sm-3 col-form-label">Input Directory</label>
105 | 					<div class="col-sm-9">
106 | 						<input
107 | 							class="form-control"
108 | 							ng-class="{'is-invalid': form.input_dir.$touched && form.input_dir.$invalid}"
109 | 							name="input_dir"
110 | 							ng-model="workflow.input_dir"
111 | 							required
112 | 						/>
113 | 					</div>
114 | 				</div>
115 | 
116 | 				<div class="form-group row">
117 | 					<label class="col-sm-3 col-form-label">Output Directory</label>
118 | 					<div class="col-sm-9">
119 | 						<input
120 | 							class="form-control"
121 | 							ng-class="{'is-invalid': form.output_dir.$touched && form.output_dir.$invalid}"
122 | 							name="output_dir"
123 | 							ng-model="workflow.output_dir"
124 | 							required
125 | 						/>
126 | 					</div>
127 | 				</div>
128 | 
129 | 				<div class="form-group row" ng-show="workflow._id != '0'">
130 | 					<label class="col-sm-3 col-form-label">Input Files</label>
131 | 					<div class="col-sm-9">
132 | 						<p class="form-control-plaintext" ng-show="workflow.input_files.length == 0">none</p>
133 | 						<p class="form-control-plaintext" ng-repeat="f in workflow.input_files">
134 | 							<a ng-href="api/workflows/{{workflow._id}}/download?path={{f}}" download>{{f}}</a>
135 | 						</p>
136 | 					</div>
137 | 				</div>
138 | 
139 | 				<div class="form-group row" ng-show="workflow._id != '0'">
140 | 					<label class="col-sm-3 col-form-label">Output Files</label>
141 | 					<div class="col-sm-9">
142 | 						<p class="form-control-plaintext" ng-show="workflow.output_files.length == 0">none</p>
143 | 						<p class="form-control-plaintext" ng-repeat="f in workflow.output_files">
144 | 							<a ng-href="api/workflows/{{workflow._id}}/download?path={{f}}" download>{{f}}</a>
145 | 						</p>
146 | 					</div>
147 | 				</div>
148 | 
149 | 				<div class="form-group row" ng-show="workflow.output_data">
150 | 					<label class="col-sm-3 col-form-label">Output Data</label>
151 | 					<div class="col-sm-9">
152 | 						<p class="form-control-plaintext">
153 | 							<a ng-href="api/workflows/{{workflow._id}}/download" download>Download</a>
154 | 						</p>
155 | 					</div>
156 | 				</div>
157 | 
158 | 				<hr>
159 | 
160 | 				<div class="text-center">
161 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="form.$invalid" ng-click="save(workflow)">Save</button>
162 | 					<a href="/" class="btn btn-link">Cancel</a>
163 | 				</div>
164 | 			</form>
165 | 		</div>
166 | 
167 | 		<div class="card mb-4" ng-show="workflow._id != '0'">
168 | 			<h6 class="card-header">Input Data</h6>
169 | 
170 | 			<form class="card-body form-horizontal" name="input_files">
171 | 				<div class="form-group row">
172 | 					<label class="col-sm-3 col-form-label">Files</label>
173 | 					<div class="col-sm-9">
174 | 						<p class="form-control-plaintext" ng-show="uploader.queue.length == 0">none</p>
175 | 						<p class="form-control-plaintext" ng-repeat="item in uploader.queue">{{item.file.name}}</p>
176 | 
177 | 						<input type="file" nv-file-select="" uploader="uploader" multiple/>
178 | 					</div>
179 | 				</div>
180 | 
181 | 				<div class="text-center">
182 | 					<button type="button" class="btn btn-outline-dark" ng-click="upload()">
183 | 						<span ng-show="uploading" class="spinner-border spinner-border-sm"></span>
184 | 						<span ng-show="!uploading">Upload</span>
185 | 					</button>
186 | 				</div>
187 | 			</form>
188 | 		</div>
189 | 
190 | 		<div class="card mb-4" ng-show="workflow._id != '0'">
191 | 			<h6 class="card-header">Execution</h6>
192 | 
193 | 			<form class="card-body form-horizontal" name="execution">
194 | 				<div class="form-group row" ng-show="workflow.status">
195 | 					<label class="col-sm-3 col-form-label">Status</label>
196 | 					<div class="col-sm-9">
197 | 						<p class="form-control-plaintext text-{{STATUS_COLORS[workflow.status]}}">
198 | 							<strong>{{workflow.status}}</strong>
199 | 						</p>
200 | 					</div>
201 | 				</div>
202 | 
203 | 				<div class="form-group row" ng-show="workflow.status">
204 | 					<label class="col-sm-3 col-form-label">Attempts</label>
205 | 					<div class="col-sm-9">
206 | 						<p class="form-control-plaintext">
207 | 							<strong>{{workflow.attempts}}</strong>
208 | 						</p>
209 | 					</div>
210 | 				</div>
211 | 
212 | 				<div class="text-center">
213 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="workflow.status == 'running'" ng-click="workflow.status != 'running' && launch(workflow._id)">
214 | 						<span ng-show="launching" class="spinner-border spinner-border-sm"></span>
215 | 						<span ng-show="!launching">Launch</span>
216 | 					</button>
217 | 
218 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="workflow.status == 'running'" ng-click="workflow.status != 'running' && resume(workflow._id)">
219 | 						<span ng-show="resuming" class="spinner-border spinner-border-sm"></span>
220 | 						<span ng-show="!resuming">Resume</span>
221 | 					</button>
222 | 
223 | 					<button type="button" class="btn btn-outline-dark" ng-disabled="workflow.status != 'running'" ng-click="workflow.status == 'running' && cancel(workflow._id)">
224 | 						<span ng-show="cancelling" class="spinner-border spinner-border-sm"></span>
225 | 						<span ng-show="!cancelling">Cancel</span>
226 | 					</button>
227 | 				</div>
228 | 
229 | 				<hr>
230 | 
231 | 				<pre class="border rounded p-2">{{workflow.log}}</pre>
232 | 			</form>
233 | 		</div>
234 | 	</div>
235 | </div>
236 | 


--------------------------------------------------------------------------------
/client/views/workflows.html:
--------------------------------------------------------------------------------
 1 | <div class="row">
 2 | 	<div class="col">
 3 | 		<div class="card mb-4">
 4 | 			<h6 class="card-header">Workflows</h6>
 5 | 
 6 | 			<div class="card-body">
 7 | 				<a href="#!/workflows/0">Create workflow</a>
 8 | 			</div>
 9 | 
10 | 			<div class="card-body">
11 | 				<div class="button-group">
12 | 					<button class="btn btn-light" ng-disabled="page == 0" ng-click="page > 0 && query(page - 1)">newer</button>
13 | 					<button class="btn btn-light" ng-click="query(page + 1)">older</button>
14 | 				</div>
15 | 			</div>
16 | 
17 | 			<table class="table mb-0">
18 | 				<th scope="col">ID</th>
19 | 				<th scope="col">Date Created</th>
20 | 				<th scope="col">Name</th>
21 | 				<th scope="col">Pipeline</th>
22 | 				<th scope="col">Status</th>
23 | 				<th scope="col"></th>
24 | 
25 | 				<tr ng-repeat="w in workflows">
26 | 					<td>
27 | 						<a ng-href="#!/workflows/{{w._id}}">{{w._id.slice(0, 8)}}</a>
28 | 					</td>
29 | 					<td>{{w.date_created | date:'short'}}</td>
30 | 					<td>{{w.name}}</td>
31 | 					<td>{{w.pipeline}}</td>
32 | 					<td>
33 | 						<strong class="text-{{STATUS_COLORS[w.status]}}">{{w.status}}</strong>
34 | 					</td>
35 | 					<td class="text-center">
36 | 						<span class="button-icon" ng-click="delete(w)">&times;</span>
37 | 					</td>
38 | 				</tr>
39 | 			</table>
40 | 		</div>
41 | 	</div>
42 | </div>
43 | 


--------------------------------------------------------------------------------
/helm/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | appVersion: "1.0"
3 | description: A Helm chart for the nextflow-api
4 | name: nextflow-api
5 | version: 0.1.2
6 | 


--------------------------------------------------------------------------------
/helm/README.md:
--------------------------------------------------------------------------------
  1 | # Deploy Nextflow-API to Kubernetes Using Helm
  2 | 
  3 | This guide assumes you have access to a K8s cluster, and either a valid PVC or storage class on that cluster.
  4 | 
  5 | ## Install Helm
  6 | 
  7 | Follow the [installation instructions](https://helm.sh/docs/intro/install) from the Helm documentation to install Helm. The Helm chart for Nextflow-API is confirmed to work on [Helm v3.0.0-beta3](https://github.com/helm/helm/releases/tag/v3.0.0-beta.3), but it is failing on many newer versions of Helm, so if you have issues deploying Nextflow-API then try using that exact version.
  8 | 
  9 | Helm 3 is used because it does not require installing anything on the K8s cluster, while Helm 2 requires the user to install Tiller. This chart should work with Helm 2 if needed.
 10 | 
 11 | ## Configure Nextflow-API Helm Chart
 12 | 
 13 | The file `values.yaml` contains all of the configurable values for the chart.
 14 | 
 15 | Edit the following sections:
 16 | 
 17 | #### PVC
 18 | ```
 19 | # PVC
 20 | NewLocalPVC:
 21 |   # If true, create new PVC on local cluster.
 22 |   # (temp, future PVCs will be dynamically configurable)
 23 |   Enabled: true
 24 |   Name: nextflow-api-local
 25 |   StorageClass: nfs
 26 |   Size: 20Gi
 27 | 
 28 | ExistingLocalPVC:
 29 |   # If true, use existing PVC on local cluster.
 30 |   # (temp, future PVCs will be dynamically configurable)
 31 |   Enabled: false
 32 |   Name: deepgtex-prp
 33 | ```
 34 | 
 35 | If you want to create a new PVC:
 36 | 
 37 | 1. Set `NewLocalPVC` to `true` and `ExistingLocalPVC` to `false`
 38 | 2. Change the `Name` to the PVC you have set up on your K8s cluster.
 39 | 3. Change the `StorageClass` and `Size` to whatever storage class and size you want to use.
 40 | 
 41 | If you want to use an existing PVC:
 42 | 
 43 | 1. Set `NewLocalPVC` to `false` and `ExistingLocalPVC` to `true`
 44 | 2. Change the `Name` to the PVC you have set up on your K8s cluster.
 45 | 
 46 | __TODO__: Remote cluster configuration (disregard and leave `false` for now)
 47 | 
 48 | #### Database and Web Server Deployments
 49 | ```
 50 | # Database deployment settings
 51 | Database:
 52 |   # Resource requests and limits per container
 53 |   Resources:
 54 |     Requests:
 55 |       CPU: 4
 56 |       Memory: 8Gi
 57 |     Limits:
 58 |       CPU: 8
 59 |       Memory: 16Gi
 60 | 
 61 | # Web server deployment settings
 62 | WebServer:
 63 |   # Number of containers
 64 |   Replicas: 1
 65 |   # Resource requests and limits per container
 66 |   Resources:
 67 |     Requests:
 68 |       CPU: 1
 69 |       Memory: 4Gi
 70 |     Limits:
 71 |       CPU: 1
 72 |       Memory: 4Gi
 73 | ```
 74 | 
 75 | Nextflow-API contains a database deployment and a web server deployment, which can optionally include multiple replicas. Note that you must use a `LoadBalancer` in order to have multiple web server replicas.
 76 | 
 77 | #### Ingress / LoadBalancer
 78 | ```
 79 | # Ingress control settings
 80 | Ingress:
 81 |   # If true, use ingress control.
 82 |   # Otherwise, generic LoadBalancer networking will be used,
 83 |   # and the other settings in this section will be ignored.
 84 |   Enabled: false
 85 |   # The subdomain to associate with this service.
 86 |   Host: nextflow-api.nautilus.optiputer.net
 87 |   Class: traefik
 88 | ```
 89 | 
 90 | Nextflow-API will either use an `Ingress` or a `LoadBalancer` to expose itself to the public Internet.
 91 | 
 92 | To use an `Ingress`:
 93 | 
 94 | 1. Set `Enabled` to `true`
 95 | 2. Change the `Host` to `nextflow-api.<domain>`. (ex. `nextflow-api.scigateway.net`)
 96 | 3. Change the `Class` if needed.
 97 | 
 98 | To use a `LoadBalancer`, simply set `Enabled` to `false`
 99 | 
100 | Now the Helm Chart is configured and ready to deploy!
101 | 
102 | ## Deploy Nextflow-API
103 | 
104 | Navigate to `nextflow-api/helm`
105 | 
106 | Deploy using `helm install nextflow-api .`
107 | 
108 | ## Use Nextflow-API
109 | 
110 | #### Give Nextflow the necessary permissions to deploy jobs to your K8s cluster.
111 | ````
112 | kubectl create rolebinding default-edit --clusterrole=edit --serviceaccount=default:default
113 | kubectl create rolebinding default-view --clusterrole=view --serviceaccount=default:default
114 | ````
115 | 
116 | These commands give the default service account the ability to view and edit cluster resources. Nextflow driver pods use this account to deploy process pods. This creates rolebindings in the `default` namespace. If you are not in the default namespace, use `KUBE_EDITOR="nano" kubectl edit rolebinding <role-binding>`. Edit the `namespace` to the one you are using, then save.
117 | 
118 | #### Ingress
119 | 
120 | If you are using an `Ingress`, simply navigate in your web browser to the `Host` that you specified.
121 | 
122 | #### LoadBalancer
123 | 
124 | If you are using a `LoadBalancer`:
125 | 
126 | 1. Run `kubectl get service` to list the services that are running in your cluster.
127 | 2. Find the service named `nextflow-api` and record the `EXTERNAL-IP`.
128 | 3. Navigate in your web browser to `<EXTERNAL-IP>:8080`
129 | 
130 | All done! Now you can use Nextflow-API to submit and monitor workflows.
131 | 
132 | ## Delete Deployment
133 | 
134 | To delete the deployment, run `helm uninstall nextflow-api`.
135 | 


--------------------------------------------------------------------------------
/helm/gen-secret.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | export KUBE_CONFIG=$(cat ~/.kube/config | base64 | tr -d '\n')
 4 | 
 5 | cat > templates/secret.yaml <<EOF
 6 | apiVersion: v1
 7 | kind: Secret
 8 | metadata:
 9 |   name: kubeconfig
10 | type: Opaque
11 | data:
12 |   config: ${KUBE_CONFIG}
13 | EOF
14 | 
15 | cat templates/secret.yaml
16 | 


--------------------------------------------------------------------------------
/helm/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "nextflow-api.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "nextflow-api.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "nextflow-api.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}


--------------------------------------------------------------------------------
/helm/templates/nextflow-api.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1
  2 | kind: Deployment
  3 | metadata:
  4 |   name: nextflow-api
  5 |   labels:
  6 |     app: nextflow-api
  7 |     chart: {{ template "nextflow-api.chart" . }}
  8 |     release: {{ .Release.Name }}
  9 | spec:
 10 |   replicas: {{ .Values.WebServer.Replicas }}
 11 |   selector:
 12 |     matchLabels:
 13 |       app: nextflow-api
 14 |   template:
 15 |     metadata:
 16 |       labels:
 17 |         app: nextflow-api
 18 |         chart: {{ template "nextflow-api.chart" . }}
 19 |         release: {{ .Release.Name }}
 20 |     spec:
 21 |       serviceAccountName: default
 22 |       containers:
 23 |       - name: nextflow-api
 24 |         image: {{ .Values.WebServer.Image }}
 25 |         imagePullPolicy: Always
 26 |         env:
 27 |         - name: NXF_EXECUTOR
 28 |           value: "k8s"
 29 |         {{ if .Values.Remote.Enabled }}
 30 |         - name: PVC_NAME
 31 |           value: {{ .Values.Remote.PVC }}
 32 |         {{ else if .Values.NewLocalPVC.Enabled }}
 33 |         - name: PVC_NAME
 34 |           value: {{ .Values.NewLocalPVC.Name }}
 35 |         {{ else if .Values.ExistingLocalPVC.Enabled }}
 36 |         - name: PVC_NAME
 37 |           value: {{ .Values.ExistingLocalPVC.Name }}
 38 |         {{ end }}
 39 |         command:
 40 |         - /bin/bash
 41 |         - -c
 42 |         {{ if eq .Values.Remote.Enabled true }}
 43 |         - scripts/startup-nautilus.sh {{ .Values.WebServer.Backend }} {{ .Values.Remote.Context }}
 44 |         {{ else }}
 45 |         - scripts/startup-nautilus.sh {{ .Values.WebServer.Backend }}
 46 |         {{ end }}
 47 |         resources:
 48 |           requests:
 49 |             cpu: {{ .Values.WebServer.Resources.Requests.CPU }}
 50 |             memory: {{ .Values.WebServer.Resources.Requests.Memory }}
 51 |           limits:
 52 |             cpu: {{ .Values.WebServer.Resources.Limits.CPU }}
 53 |             memory: {{ .Values.WebServer.Resources.Limits.Memory }}
 54 |         volumeMounts:
 55 |         - name: vol-1
 56 |           mountPath: /workspace
 57 |         {{ if .Values.Remote.Enabled }}
 58 |         - name: conf
 59 |           mountPath: "/etc/.kube"
 60 |         {{ end }}
 61 |         ports:
 62 |         - containerPort: 8080
 63 |         lifecycle:
 64 |           preStop:
 65 |             exec:
 66 |               command:
 67 |               - /bin/bash
 68 |               - -c
 69 |               - scripts/db-backup.sh prestop; mongod --shutdown
 70 |       volumes:
 71 |       {{ if .Values.Remote.Enabled }}
 72 |       - name: conf
 73 |         secret:
 74 |           secretName: kubeconfig
 75 |           defaultMode: 256
 76 |       {{ end }}
 77 |       {{ if .Values.NewLocalPVC.Enabled }}
 78 |       - name: vol-1
 79 |         persistentVolumeClaim:
 80 |           claimName: {{ .Values.NewLocalPVC.Name }}
 81 |       {{ end }}
 82 |       {{ if .Values.ExistingLocalPVC.Enabled }}
 83 |       - name: vol-1
 84 |         persistentVolumeClaim:
 85 |           claimName: {{ .Values.ExistingLocalPVC.Name }}
 86 |       {{ end }}
 87 | ---
 88 | kind: Service
 89 | apiVersion: v1
 90 | metadata:
 91 |   name: nextflow-api-ip
 92 |   labels:
 93 |     app: nextflow-api-ip
 94 | spec:
 95 |   type: ClusterIP
 96 |   selector:
 97 |     app: nextflow-api-ip
 98 |   ports:
 99 |   - port: 8080
100 |     targetPort: 8080
101 | ---
102 | {{ if .Values.LoadBalancer.Enabled }}
103 | apiVersion: v1
104 | kind: Service
105 | metadata:
106 |   name: nextflow-api
107 |   labels:
108 |     app: nextflow-api
109 |     chart: {{ template "nextflow-api.chart" . }}
110 |     release: {{ .Release.Name }}
111 | spec:
112 |   ports:
113 |   - port: 8080
114 |     targetPort: 8080
115 |   type: LoadBalancer
116 |   selector:
117 |     app: nextflow-api
118 | {{ end }}
119 | ---
120 | {{ if .Values.Ingress.Enabled }}
121 | apiVersion: v1
122 | kind: Service
123 | metadata:
124 |   name: nextflow-api
125 |   labels:
126 |     app: nextflow-api
127 |     chart: {{ template "nextflow-api.chart" . }}
128 |     release: {{ .Release.Name }}
129 | spec:
130 |   ports:
131 |   - port: 8080
132 |     targetPort: 8080
133 |   selector:
134 |     app: nextflow-api
135 | ---
136 | apiVersion: extensions/v1beta1
137 | kind: Ingress
138 | metadata:
139 |   name: nextflow-api
140 |   annotations:
141 |     traefik.ingress.kubernetes.io/router.tls: ""
142 |     kubernetes.io/ingress.class: {{ .Values.Ingress.Class }}
143 | spec:
144 |   rules:
145 |   - host: {{ .Values.Ingress.Host }}
146 |     http:
147 |       paths:
148 |       - path: /
149 |         backend:
150 |           serviceName: nextflow-api
151 |           servicePort: 8080
152 | {{ end }}
153 | ---
154 | {{ if .Values.NodePorts.Enabled }}
155 | apiVersion: v1
156 | kind: Service
157 | metadata:  
158 |   name: nextflow-api
159 | spec:
160 |   selector:    
161 |     app: nextflow-api
162 |   type: NodePort
163 |   ports:  
164 |   - name: nextflow-api
165 |     port: 8080
166 |     targetPort: 8080
167 |     nodePort: {{ .Values.NodePorts.NodePort }}
168 | {{ end }}
169 | 


--------------------------------------------------------------------------------
/helm/templates/pvc.yaml:
--------------------------------------------------------------------------------
 1 | {{ if .Values.NewLocalPVC.Enabled }}
 2 | kind: PersistentVolumeClaim
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: {{ .Values.NewLocalPVC.Name }}
 6 |   labels:
 7 |     app: {{ template "nextflow-api.name" . }}
 8 |     chart: {{ template "nextflow-api.chart" . }}
 9 |     release: {{ .Release.Name }}
10 | spec:
11 |   accessModes:
12 |     - ReadWriteMany
13 |   storageClassName: {{ .Values.NewLocalPVC.StorageClass }}
14 |   resources:
15 |     requests:
16 |       storage: {{ .Values.NewLocalPVC.Size }}
17 | {{ end }}
18 | 


--------------------------------------------------------------------------------
/helm/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for nextflow-api.
 2 | # Declare variables to be passed into your templates.
 3 | 
 4 | # PVC
 5 | NewLocalPVC:
 6 |   # If true, create new PVC on local cluster.
 7 |   Enabled: false
 8 |   Name: nextflow-api-pvc
 9 |   StorageClass: nfs
10 |   Size: 20Gi
11 | 
12 | ExistingLocalPVC:
13 |   # If true, use existing PVC on local cluster.
14 |   Enabled: true
15 |   Name: scidas-dev
16 | 
17 | Remote:
18 |   # If true, use PVC/compute of remote cluster.
19 |   # Otherwise, PVC/compute of local cluster will be used.
20 |   Enabled: false
21 |   Context: nautilus
22 |   PVC: deepgtex-prp
23 | 
24 | # Web server deployment settings
25 | WebServer:
26 |   # Docker image, change to your nextflow-api image if needed
27 |   Image: bentsherman/nextflow-api
28 |   # Database Backend - options are "file" and "mongo"
29 |   Backend: "mongo"
30 |   # Number of containers
31 |   Replicas: 1
32 |   # Resource requests and limits per container
33 |   Resources:
34 |     Requests:
35 |       CPU: 3
36 |       Memory: 10Gi
37 |     Limits:
38 |       CPU: 3
39 |       Memory: 10Gi
40 | 
41 | # Load Balancer Settings
42 | LoadBalancer:
43 |   Enabled: false
44 | 
45 | # Ingress control settings
46 | Ingress:
47 |   # If true, use Ingress control.
48 |   # Otherwise, generic LoadBalancer networking will be used,
49 |   # and the other settings in this section will be ignored.
50 |   Enabled: true
51 |   # The subdomain to associate with this service.
52 |   Host: nextflow-api-dev.nautilus.optiputer.net
53 |   # The class of the ingress controller to use.
54 |   # For SLATE this should be 'slate'.
55 |   Class: traefik
56 | 
57 | # NodePort Settings
58 | NodePorts:
59 |   Enabled: false
60 |   NodePort: 31376
61 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.14.0
 2 | astor==0.8.1
 3 | astunparse==1.6.3
 4 | cached-property==1.5.2
 5 | cachetools==4.2.4
 6 | certifi==2021.5.30
 7 | charset-normalizer==2.0.6
 8 | clang==5.0
 9 | cycler==0.10.0
10 | dill==0.3.2
11 | flatbuffers==1.12
12 | forestci==0.5.1
13 | gast==0.4.0
14 | google-auth==1.35.0
15 | google-auth-oauthlib==0.4.6
16 | google-pasta==0.2.0
17 | grpcio==1.41.0
18 | h5py==3.1.0
19 | idna==3.2
20 | importlib-metadata==4.8.1
21 | joblib==1.0.1
22 | keras==2.6.0
23 | Keras-Applications==1.0.8
24 | Keras-Preprocessing==1.1.2
25 | kiwisolver==1.3.2
26 | Markdown==3.3.4
27 | matplotlib==3.4.3
28 | motor==2.1.0
29 | numpy==1.19.5
30 | oauthlib==3.1.1
31 | opt-einsum==3.3.0
32 | pandas==1.3.3
33 | Pillow==8.3.2
34 | protobuf==3.18.0
35 | pyasn1==0.4.8
36 | pyasn1-modules==0.2.8
37 | pymongo==3.12.0
38 | pyparsing==2.4.7
39 | python-dateutil==2.8.2
40 | pytz==2021.1
41 | requests==2.26.0
42 | requests-oauthlib==1.3.0
43 | rsa==4.7.2
44 | scikit-learn==1.0
45 | scipy==1.7.1
46 | seaborn==0.11.2
47 | six==1.15.0
48 | tensorboard==2.6.0
49 | tensorboard-data-server==0.6.1
50 | tensorboard-plugin-wit==1.8.0
51 | tensorflow==2.6.0
52 | tensorflow-estimator==2.6.0
53 | termcolor==1.1.0
54 | threadpoolctl==2.2.0
55 | tornado==6.0.3
56 | typing-extensions==3.7.4.3
57 | urllib3==1.26.7
58 | Werkzeug==2.0.1
59 | wrapt==1.12.1
60 | zipp==3.5.0
61 | 


--------------------------------------------------------------------------------
/scripts/convert-json-pkl.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import json
 5 | import pickle
 6 | 
 7 | 
 8 | 
 9 | def main():
10 | 	# parse command-line arguments
11 | 	parser = argparse.ArgumentParser()
12 | 	parser.add_argument('infile', help='input file')
13 | 	parser.add_argument('outfile', help='output file')
14 | 
15 | 	args = parser.parse_args()
16 | 
17 | 	# load input file
18 | 	infile = open(args.infile, 'r')
19 | 	obj = json.load(infile)
20 | 
21 | 	# save output file
22 | 	outfile = open(args.outfile, 'wb')
23 | 	pickle.dump(obj, outfile)
24 | 
25 | 
26 | 
27 | if __name__ == '__main__':
28 | 	main()


--------------------------------------------------------------------------------
/scripts/db-backup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Backup a database to an archive.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 1 ]]; then
 6 | 	>&2 echo "usage: $0 <type>"
 7 | 	exit 1
 8 | fi
 9 | 
10 | DATABASE="nextflow_api"
11 | DUMP="dump"
12 | BACKUPS="/workspace/_backups"
13 | TYPE="$1"
14 | 
15 | # remove existing dump directory
16 | rm -rf ${DUMP}
17 | 
18 | # dump database to dump directory
19 | mongodump -d ${DATABASE} -o ${DUMP}
20 | 
21 | # create archive of dump directory
22 | tar -czvf $(date +"${BACKUPS}/${TYPE}_%Y_%m_%d.tar.gz") ${DUMP}
23 | 
24 | # remove older archives of the same type
25 | NUM_BACKUPS=$(ls ${BACKUPS}/${TYPE}_* | wc -l)
26 | MAX_BACKUPS=10
27 | 
28 | if [[ ${NUM_BACKUPS} > ${MAX_BACKUPS} ]]; then
29 | 	rm -f "$(ls ${BACKUPS}/${TYPE}_* | head -n 1)"
30 | fi
31 | 


--------------------------------------------------------------------------------
/scripts/db-restore.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Restore a database from an archive
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 1 ]]; then
 6 | 	>&2 echo "usage: $0 <archive>"
 7 | 	exit 1
 8 | fi
 9 | 
10 | ARCHIVE="$1"
11 | DUMP="dump"
12 | DATABASE="nextflow_api"
13 | 
14 | # remove existing dump directory
15 | rm -rf ${DUMP}
16 | 
17 | # extract archive to dump directory
18 | tar -xvf ${ARCHIVE}
19 | 
20 | # restore database from archive
21 | mongorestore --drop --nsInclude ${DATABASE}.* --noIndexRestore ${DUMP}
22 | 


--------------------------------------------------------------------------------
/scripts/db-startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Startup script for kubernetes deployment.
 3 | 
 4 | # start mongodb service
 5 | mkdir -p /data/db
 6 | mkdir -p /var/log/mongodb
 7 | 
 8 | mongod \
 9 |     --fork \
10 |     --dbpath /data/db \
11 |     --logpath /var/log/mongodb/mongod.log \
12 |     --bind_ip 0.0.0.0
13 | 
14 | # initialize backups directory
15 | BACKUPS="/workspace/_backups"
16 | 
17 | mkdir -p ${BACKUPS}
18 | 
19 | # restore database backup if present
20 | LATEST=$(ls ${BACKUPS} | tail -n 1)
21 | 
22 | if [[ ! -z ${LATEST} ]]; then
23 |     scripts/db-restore.sh "${BACKUPS}/${LATEST}"
24 | fi
25 | 
26 | # create cronjob to backup database daily
27 | echo "00 06 * * * ${PWD}/scripts/db-backup.sh daily" | crontab -
28 | 


--------------------------------------------------------------------------------
/scripts/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMAGE_NAME="bentsherman/nextflow-api"
 4 | 
 5 | set -ex
 6 | 
 7 | # remove data files
 8 | rm -rf _models _trace _workflows .nextflow* db.json db.pkl
 9 | 
10 | # build docker image
11 | docker build -t ${IMAGE_NAME} .
12 | docker push ${IMAGE_NAME}
13 | 
14 | # deploy helm chart to kubernetes cluster
15 | helm uninstall nextflow-api
16 | helm install nextflow-api ./helm
17 | 


--------------------------------------------------------------------------------
/scripts/kube-cancel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Remove all pods associated with a given workflow run.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 1 ]]; then
 6 | 	echo "usage: $0 <run-name>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | RUN_NAME="$1"
11 | 
12 | # query list of pods
13 | PODS=`kubectl get pods --output custom-columns=NAME:.metadata.name,RUN:.metadata.labels.runName \
14 | 	| grep ${RUN_NAME} \
15 | 	| awk '{ print $1 }'`
16 | 
17 | # delete pods
18 | if [[ ! -z ${PODS} ]]; then
19 | 	kubectl delete pods ${PODS}
20 | fi
21 | 


--------------------------------------------------------------------------------
/scripts/kube-config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Configure kubectl to use a given context on startup.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 1 ]]; then
 6 | 	echo "usage: $0 <kube-context"
 7 | 	exit -1
 8 | fi
 9 | 
10 | KUBE_CONTEXT="$1"
11 | 
12 | # configure kubectl context
13 | cp -R /etc/.kube /root
14 | kubectl config --kubeconfig=/root/.kube/config use-context ${KUBE_CONTEXT}


--------------------------------------------------------------------------------
/scripts/kube-run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Run a nextflow pipeline on a Kubernetes cluster.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# -lt 3 ]]; then
 6 | 	echo "usage: $0 <pvc-name> <id> <pipeline> [options]"
 7 | 	exit -1
 8 | fi
 9 | 
10 | PVC_NAME="$1"
11 | ID="$2"
12 | PIPELINE="$3"
13 | 
14 | shift 3
15 | OPTIONS="$*"
16 | 
17 | POD_NAME="nextflow-api-${ID}"
18 | SPEC_FILE="${POD_NAME}.yaml"
19 | PVC_PATH="/workspace"
20 | 
21 | # write pod spec to file
22 | cat > ${SPEC_FILE} <<EOF
23 | apiVersion: v1
24 | kind: Pod
25 | metadata:
26 |   labels:
27 |     app: nextflow
28 |   name: ${POD_NAME}
29 | spec:
30 |   containers:
31 |   - name: ${POD_NAME}
32 |     image: nextflow/nextflow:${NXF_VER}
33 |     imagePullPolicy: IfNotPresent
34 |     env:
35 |     - name: NXF_WORK
36 |       value: ${PVC_PATH}/_workflows/${ID}/work
37 |     - name: NXF_ASSETS
38 |       value: ${PVC_PATH}/projects
39 |     - name: NXF_EXECUTOR
40 |       value: k8s
41 |     - name: NXF_ANSI_LOG
42 |       value: "false"
43 |     command:
44 |     - /bin/bash
45 |     - -c
46 |     - cd ${PVC_PATH}/_workflows/${ID}; nextflow run ${PIPELINE} ${OPTIONS}
47 |     resources:
48 |       requests:
49 |         cpu: 1
50 |         memory: 4Gi
51 |     volumeMounts:
52 |     - name: vol-1
53 |       mountPath: ${PVC_PATH}
54 |   restartPolicy: Never
55 |   volumes:
56 |   - name: vol-1
57 |     persistentVolumeClaim:
58 |       claimName: ${PVC_NAME}
59 | EOF
60 | 
61 | # create pod
62 | kubectl create -f ${SPEC_FILE}
63 | 
64 | # wait for pod to initialize
65 | POD_STATUS=""
66 | 
67 | while [[ ${POD_STATUS} != "Running" ]]; do
68 | 	sleep 2
69 | 	POD_STATUS="$(kubectl get pod --no-headers --output jsonpath={.status.phase} ${POD_NAME})"
70 | done
71 | 
72 | # stream output log
73 | kubectl logs -f ${POD_NAME}
74 | 
75 | # delete pod
76 | kubectl delete -f ${SPEC_FILE}
77 | 
78 | # cleanup
79 | rm -f ${SPEC_FILE}
80 | 


--------------------------------------------------------------------------------
/scripts/kube-save.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Collect output data into a single archive.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# != 2 ]]; then
 6 | 	echo "usage: $0 <id> <path>"
 7 | 	exit -1
 8 | fi
 9 | 
10 | ID="$1"
11 | SRC_PATH="$2"
12 | DST_DIRNAME="$(dirname ${SRC_PATH})"
13 | 
14 | # replace any links with the original files
15 | for f in $(find ${SRC_PATH} -type l); do
16 | 	cp --remove-destination $(readlink $f) $f
17 | done
18 | 
19 | # copy log file into output folder
20 | cp ${DST_DIRNAME}/.workflow.log ${SRC_PATH}/workflow.log
21 | 
22 | # remove old nextflow reports (except for logs)
23 | rm -f ${SRC_PATH}/reports/report.html.*
24 | rm -f ${SRC_PATH}/reports/timeline.html.*
25 | rm -f ${SRC_PATH}/reports/trace.txt.*
26 | 
27 | # create archive of output data
28 | cd ${DST_DIRNAME}
29 | 
30 | tar -czf "${ID}-output.tar.gz" $(basename ${SRC_PATH})/*
31 | 


--------------------------------------------------------------------------------
/scripts/startup-local.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Startup script for local environment.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# == 1 ]]; then
 6 |     BACKEND="$1"
 7 | else
 8 | 	echo "usage: $0 <backend>"
 9 | 	exit -1
10 | fi
11 | 
12 | # initialize environment
13 | source ${HOME}/anaconda3/etc/profile.d/conda.sh
14 | conda activate nextflow-api
15 | 
16 | # start mongodb server
17 | if [[ ${BACKEND} == "mongo" ]]; then
18 |     sudo service mongodb start
19 | fi
20 | 
21 | # start web server
22 | export NXF_EXECUTOR="local"
23 | export TF_CPP_MIN_LOG_LEVEL="3"
24 | 
25 | bin/server.py --backend=${BACKEND}
26 | 


--------------------------------------------------------------------------------
/scripts/startup-nautilus.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Startup script for Nautilus/Kubernetes environment.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# == 1 ]]; then
 6 |     BACKEND="$1"
 7 | elif [[ $# == 2 ]]; then
 8 |     BACKEND="$1"
 9 |     KUBE_CONTEXT="$2"
10 | else
11 | 	echo "usage: $0 <backend> [kube-context]"
12 | 	exit -1
13 | fi
14 | 
15 | # start mongodb server
16 | if [[ ${BACKEND} == "mongo" ]]; then
17 |     scripts/db-startup.sh
18 | fi
19 | 
20 | # configure kubectl context if specified
21 | if [[ ! -z ${KUBE_CONTEXT} ]]; then
22 |     scripts/kube-config.sh ${KUBE_CONTEXT}
23 | fi
24 | 
25 | # start web server
26 | export TF_CPP_MIN_LOG_LEVEL="3"
27 | 
28 | bin/server.py --backend=${BACKEND}
29 | 


--------------------------------------------------------------------------------
/scripts/startup-palmetto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Startup script for Palmetto environment.
 3 | 
 4 | # parse command-line arguments
 5 | if [[ $# == 1 ]]; then
 6 |     BACKEND="$1"
 7 | else
 8 | 	echo "usage: $0 <backend>"
 9 | 	exit -1
10 | fi
11 | 
12 | # load modules
13 | module purge
14 | module load anaconda3/5.1.0-gcc/8.3.1
15 | module load nextflow/20.07.1
16 | 
17 | # initialize environment
18 | source activate nextflow-api
19 | 
20 | # start mongodb server
21 | if [[ ${BACKEND} == "mongo" ]]; then
22 |     killall mongod
23 | 
24 |     mongod \
25 |         --fork \
26 |         --dbpath /mongo/${USER}/data \
27 |         --logpath /mongo/${USER}/mongod.log \
28 |         --bind_ip_all
29 | fi
30 | 
31 | # start web server
32 | export NXF_EXECUTOR="pbspro"
33 | export TF_CPP_MIN_LOG_LEVEL="3"
34 | 
35 | bin/server.py --backend=${BACKEND}
36 | 


--------------------------------------------------------------------------------