├── .ci
├── cvmfs-fuse.conf
├── get-bundle-url.py
├── jenkins.sh
└── repos.conf
├── .gitignore
├── LICENSE
├── README.md
├── ansible
├── .pass
├── ansible.cfg
├── files
│ └── idc
│ │ ├── bootstrap.sh
│ │ ├── data_manager_conf.xml
│ │ └── workflow_schedulers_conf.xml
├── group_vars
│ ├── idc_builders.yaml
│ └── js2.yaml
├── inventory.yaml
├── playbook-launch.yaml
├── playbook-teardown.yaml
├── requirements.txt
├── requirements.yaml
├── roles
│ └── openstack
│ │ └── tasks
│ │ ├── clean.yml
│ │ ├── main.yml
│ │ ├── secrets.yml
│ │ └── spawn.yml
└── templates
│ ├── idc
│ ├── object_store_conf.xml.j2
│ └── tpv
│ │ └── idc.yaml.j2
│ └── nginx
│ └── idc-build.j2
├── config
└── tool_data_table_conf.xml
├── cvmfs_current_managed
├── cvmfs_data_managers.yml
└── cvmfs_managed_genomes.yml
├── data_managers.yml
├── genomes.yml
├── run_builder.sh
└── scripts
├── cleanup.sh
├── job_conf.xml
├── make_dm_genomes.py
└── make_fetch.py
/.ci/cvmfs-fuse.conf:
--------------------------------------------------------------------------------
1 | CVMFS_CACHE_BASE=${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache
2 | CVMFS_RELOAD_SOCKETS=${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache
3 | CVMFS_USYSLOG=${WORKSPACE}/${BUILD_NUMBER}/cvmfs.log
4 | CVMFS_CLAIM_OWNERSHIP=yes
5 | CVMFS_SERVER_URL="http://cvmfs1-psu0.galaxyproject.org/cvmfs/@fqrn@;http://cvmfs1-iu0.galaxyproject.org/cvmfs/@fqrn@;http://cvmfs1-tacc0.galaxyproject.org/cvmfs/@fqrn@"
6 | CVMFS_KEYS_DIR=/etc/cvmfs/keys/galaxyproject.org
7 | CVMFS_HTTP_PROXY=DIRECT
8 | # this is critical for release managers (and thus for our faux-release-manager) because ext4's xattr space is too small
9 | # to copy the cvmfs client's xattrs, yielding ENOSPC (No space left on device) for any copy-on-writes
10 | CVMFS_HIDE_MAGIC_XATTRS=yes
11 |
--------------------------------------------------------------------------------
/.ci/get-bundle-url.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import argparse
3 | import os
4 |
5 | import requests
6 | from bioblend.galaxy import GalaxyInstance
7 |
8 |
9 | EXT = 'data_manager_json'
10 |
11 | parser = argparse.ArgumentParser(description="")
12 | parser.add_argument(
13 | "-g", "--galaxy-url", default="http://localhost:8080", help="The Galaxy server URL"
14 | )
15 | parser.add_argument(
16 | "-u", "--galaxy-user", default="idc@galaxyproject.org", help="Galaxy user email"
17 | )
18 | parser.add_argument(
19 | "-p", "--galaxy-password", help="Galaxy user password (or set $IDC_USER_PASS)"
20 | )
21 | parser.add_argument(
22 | "-a", "--galaxy-api-key", help="Galaxy API key (or set $EPHEMERIS_API_KEY)"
23 | )
24 | parser.add_argument(
25 | "-n", "--history-name", default="Data Manager History (automatically created)", help="History name"
26 | )
27 | parser.add_argument(
28 | "-r", "--record-file", help="Record file"
29 | )
30 | args = parser.parse_args()
31 |
32 | api_key = args.galaxy_api_key or os.environ.get("EPHEMERIS_API_KEY")
33 | password = args.galaxy_password or os.environ.get("IDC_USER_PASS")
34 | if api_key:
35 | auth_kwargs = {"key": api_key}
36 | elif password:
37 | auth_kwargs = {"email": args.galaxy_user, "password": password}
38 | else:
39 | raise RuntimeError("No Galaxy credentials supplied")
40 |
41 | gi = GalaxyInstance(url=args.galaxy_url, **auth_kwargs)
42 |
43 | history = gi.histories.get_histories(name=args.history_name, deleted=False)[0]
44 | history_id = history['id']
45 | datasets = gi.datasets.get_datasets(
46 | history_id=history_id, extension=EXT, order="create_time-dsc"
47 | )
48 | dataset_id = datasets[0]['id']
49 |
50 | bundle_url = f"{args.galaxy_url}/api/datasets/{dataset_id}/display?to_ext={EXT}"
51 |
52 | if args.record_file:
53 | with open(args.record_file, "w") as fh:
54 | fh.write(f"galaxy_url: {args.galaxy_url}\n")
55 | fh.write(f"history_id: {history_id}\n")
56 | fh.write(f"history_url: {args.galaxy_url}/{history['url']}\n")
57 | fh.write(f"bundle_dataset_id: {dataset_id}\n")
58 | fh.write(f"bundle_dataset_url: {bundle_url}\n")
59 |
60 | print(bundle_url)
61 |
--------------------------------------------------------------------------------
/.ci/jenkins.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 |
4 | # Set this variable to 'true' to publish on successful installation
5 | : ${PUBLISH:=false}
6 |
7 | BUILD_GALAXY_URL="http://idc-build"
8 | PUBLISH_GALAXY_URL="https://usegalaxy.org"
9 | SSH_MASTER_SOCKET_DIR="${HOME}/.cache/idc"
10 | MAIN_BRANCH='main'
11 |
12 | # Set to 'centos:...' or 'rockylinux:...' and set GALAXY_GIT_* or GALAXY_SERVER_DIR below to use a clone
13 | IMPORT_DOCKER_IMAGE='rockylinux:8'
14 | # Disable if using a locally built image e.g. for debugging
15 | IMPORT_DOCKER_IMAGE_PULL=true
16 |
17 | #GALAXY_TEMPLATE_DB_URL='https://raw.githubusercontent.com/davebx/galaxyproject-sqlite/master/20.01.sqlite'
18 | #GALAXY_TEMPLATE_DB="${GALAXY_TEMPLATE_DB_URL##*/}"
19 | # Unset to use create_db.py, which is fast now that it doesn't migrate new DBs
20 | GALAXY_TEMPLATE_DB_URL=
21 | GALAXY_TEMPLATE_DB='galaxy.sqlite'
22 |
23 | EPHEMERIS="git+https://github.com/mvdbeek/ephemeris.git@dm_parameters#egg_name=ephemeris"
24 | GALAXY_MAINTENANCE_SCRIPTS="git+https://github.com/mvdbeek/galaxy-maintenance-scripts.git@avoid_galaxy_app#egg_name=galaxy-maintenance-scripts"
25 |
26 | # Should be set by Jenkins, so the default here is for development
27 | : ${GIT_COMMIT:=$(git rev-parse HEAD)}
28 |
29 | # Set to true to perform everything on the Jenkins worker and copy results to the Stratum 0 for publish, instead of
30 | # performing everything directly on the Stratum 0. Requires preinstallation/preconfiguration of CVMFS and for
31 | # fuse-overlayfs to be installed on Jenkins workers.
32 | USE_LOCAL_OVERLAYFS=false
33 |
34 | # Set to true to run the importer in a docker container
35 | USE_DOCKER="$USE_LOCAL_OVERLAYFS"
36 |
37 | REMOTE_PYTHON=/opt/rh/rh-python38/root/usr/bin/python3
38 | REMOTE_WORKDIR_PARENT=/srv/idc
39 |
40 | # $EPHEMERIS_API_KEY and $IDC_VAULT_PASS should be set in the environment
41 |
42 | #
43 | # Development/debug options
44 | #
45 |
46 | #
47 | # Ensure that everything is defined for set -u
48 | #
49 |
50 | DM_STAGE=0
51 | TOOL_YAMLS=()
52 | REPO_USER=
53 | REPO_STRATUM0=
54 | SSH_MASTER_SOCKET=
55 | WORKDIR=
56 | REMOTE_WORKDIR=
57 | USER_UID="$(id -u)"
58 | USER_GID="$(id -g)"
59 | OVERLAYFS_UPPER=
60 | OVERLAYFS_LOWER=
61 | OVERLAYFS_WORK=
62 | OVERLAYFS_MOUNT=
63 | EPHEMERIS_BIN=
64 | GALAXY_MAINTENANCE_SCRIPTS_BIN=
65 |
66 | SSH_MASTER_UP=false
67 | CVMFS_TRANSACTION_UP=false
68 | IMPORT_CONTAINER_UP=false
69 | LOCAL_CVMFS_MOUNTED=false
70 | LOCAL_OVERLAYFS_MOUNTED=false
71 | BUILD_GALAXY_UP=false
72 |
73 |
74 | function trap_handler() {
75 | { set +x; } 2>/dev/null
76 | # return to original dir
77 | while popd 2>/dev/null; do :; done || true
78 | $IMPORT_CONTAINER_UP && stop_import_container
79 | clean_preconfigured_container
80 | $LOCAL_CVMFS_MOUNTED && unmount_overlay
81 | # $LOCAL_OVERLAYFS_MOUNTED does not need to be checked here since if it's true, $LOCAL_CVMFS_MOUNTED must be true
82 | $CVMFS_TRANSACTION_UP && abort_transaction
83 | $BUILD_GALAXY_UP && stop_build_galaxy
84 | clean_workspace
85 | [ -n "$WORKSPACE" ] && log_exec rm -rf "$WORKSPACE"
86 | $SSH_MASTER_UP && [ -n "$REMOTE_WORKDIR" ] && exec_on rm -rf "$REMOTE_WORKDIR"
87 | $SSH_MASTER_UP && stop_ssh_control
88 | return 0
89 | }
90 | trap "trap_handler" SIGTERM SIGINT ERR EXIT
91 |
92 |
93 | function log() {
94 | [ -t 0 ] && echo -e '\033[1;32m#' "$@" '\033[0m' || echo '#' "$@"
95 | }
96 |
97 |
98 | function log_error() {
99 | [ -t 0 ] && echo -e '\033[0;31mERROR:' "$@" '\033[0m' || echo 'ERROR:' "$@"
100 | }
101 |
102 |
103 | function log_debug() {
104 | echo "####" "$@"
105 | }
106 |
107 |
108 | function log_exec() {
109 | local rc
110 | if $USE_LOCAL_OVERLAYFS && ! $SSH_MASTER_UP; then
111 | set -x
112 | eval "$@"
113 | else
114 | set -x
115 | "$@"
116 | fi
117 | { rc=$?; set +x; } 2>/dev/null
118 | return $rc
119 | }
120 |
121 |
122 | function log_exit_error() {
123 | log_error "$@"
124 | exit 1
125 | }
126 |
127 |
128 | function log_exit() {
129 | echo "$@"
130 | exit 0
131 | }
132 |
133 |
134 | function exec_on() {
135 | if $USE_LOCAL_OVERLAYFS && ! $SSH_MASTER_UP; then
136 | log_exec "$@"
137 | else
138 | log_exec ssh -S "$SSH_MASTER_SOCKET" -l "$REPO_USER" "$REPO_STRATUM0" -- "$@"
139 | fi
140 | }
141 |
142 |
143 | function copy_to() {
144 | local file="$1"
145 | if $USE_LOCAL_OVERLAYFS && ! $SSH_MASTER_UP; then
146 | log_exec cp "$file" "${WORKDIR}/${file##*}"
147 | else
148 | log_exec scp -o "ControlPath=$SSH_MASTER_SOCKET" "$file" "${REPO_USER}@${REPO_STRATUM0}:${REMOTE_WORKDIR}/${file##*/}"
149 | fi
150 | }
151 |
152 |
153 | function check_bot_command() {
154 | log 'Checking for Github PR Bot commands'
155 | log_debug "Value of \$ghprbCommentBody is: ${ghprbCommentBody:-UNSET}"
156 | case "${ghprbCommentBody:-UNSET}" in
157 | "@galaxybot deploy"*)
158 | PUBLISH=true
159 | ;;
160 | esac
161 | if $PUBLISH; then
162 | log "Publish requested; running build and import"
163 | else
164 | log "Publish not requested, exiting"
165 | exit 0
166 | fi
167 | }
168 |
169 |
170 | function load_repo_configs() {
171 | log 'Loading repository configs'
172 | . ./.ci/repos.conf
173 | }
174 |
175 |
176 | function detect_changes() {
177 | REPO=idc
178 |
179 | log "Getting repo for: ${REPO}"
180 | REPO="${REPOS[$REPO]}"
181 | declare -p REPO
182 | }
183 |
184 |
185 | function set_repo_vars() {
186 | REPO_USER="${REPO_USERS[$REPO]}"
187 | REPO_STRATUM0="${REPO_STRATUM0S[$REPO]}"
188 | CONTAINER_NAME="idc-${REPO_USER}-${BUILD_NUMBER}"
189 | if $USE_LOCAL_OVERLAYFS; then
190 | OVERLAYFS_LOWER="${WORKSPACE}/${BUILD_NUMBER}/lower"
191 | OVERLAYFS_UPPER="${WORKSPACE}/${BUILD_NUMBER}/upper"
192 | OVERLAYFS_WORK="${WORKSPACE}/${BUILD_NUMBER}/work"
193 | OVERLAYFS_MOUNT="${WORKSPACE}/${BUILD_NUMBER}/mount"
194 | CVMFS_CACHE="${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache"
195 | else
196 | OVERLAYFS_UPPER="/var/spool/cvmfs/${REPO}/scratch/current"
197 | OVERLAYFS_LOWER="/var/spool/cvmfs/${REPO}/rdonly"
198 | OVERLAYFS_MOUNT="/cvmfs/${REPO}"
199 | fi
200 | }
201 |
202 |
203 | function setup_ansible() {
204 | log "Setting up Ansible"
205 | log_exec python3 -m venv ansible-venv
206 | . ./ansible-venv/bin/activate
207 | log_exec pip install --upgrade pip wheel
208 | pushd ansible
209 | log_exec pip install -r requirements.txt
210 | log_exec ansible-galaxy role install -p roles -r requirements.yaml
211 | log_exec ansible-galaxy collection install -p collections -r requirements.yaml
212 | popd
213 | deactivate
214 | }
215 |
216 |
217 | function setup_ephemeris() {
218 | # Sets global $EPHEMERIS_BIN
219 | EPHEMERIS_BIN="$(pwd)/ephemeris/bin"
220 | log "Setting up Ephemeris"
221 | log_exec python3 -m venv ephemeris
222 | log_exec "${EPHEMERIS_BIN}/pip" install --upgrade pip wheel
223 | log_exec "${EPHEMERIS_BIN}/pip" install --index-url https://wheels.galaxyproject.org/simple/ \
224 | --extra-index-url https://pypi.org/simple/ "${EPHEMERIS:=ephemeris}"
225 | }
226 |
227 |
228 | function setup_remote_ephemeris() {
229 | # Sets global $EPHEMERIS_BIN
230 | EPHEMERIS_BIN="${REMOTE_WORKDIR}/ephemeris/bin"
231 | log "Setting up remote Ephemeris"
232 | exec_on "$REMOTE_PYTHON" -m venv "${REMOTE_WORKDIR}/ephemeris"
233 | exec_on "${EPHEMERIS_BIN}/pip" install --upgrade pip wheel
234 | # urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'OpenSSL 1.0.2k-fips 26 Jan 2017'. See: https://github.com/urllib3/urllib3/issues/2168
235 | exec_on "${EPHEMERIS_BIN}/pip" install --index-url https://wheels.galaxyproject.org/simple/ \
236 | --extra-index-url https://pypi.org/simple/ "${BIOBLEND:=bioblend}" "${EPHEMERIS:=ephemeris}" "'urllib3<2'"
237 | }
238 |
239 |
240 | function setup_galaxy_maintenance_scripts() {
241 | # Sets global $GALAXY_MAINTENANCE_SCRIPTS
242 | local venv="${1:-.}/galaxy-maintenance-scripts"
243 | local python="${2:-python3}"
244 | GALAXY_MAINTENANCE_SCRIPTS_BIN="${venv}/bin"
245 | log "Setting up Galaxy Maintenance Scripts"
246 | exec_on "$python" -m venv "$venv"
247 | exec_on "${venv}/bin/pip" install --upgrade pip wheel
248 | exec_on "${venv}/bin/pip" install --index-url https://wheels.galaxyproject.org/simple/ \
249 | --extra-index-url https://pypi.org/simple/ "$GALAXY_MAINTENANCE_SCRIPTS" "'urllib3<2'"
250 | }
251 |
252 |
253 | function verify_cvmfs_revision() {
254 | log "Verifying that CVMFS Client and Stratum 0 are in sync"
255 | local cvmfs_io_sock="${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache/${REPO}/cvmfs_io.${REPO}"
256 | local stratum0_published_url="http://${REPO_STRATUM0}/cvmfs/${REPO}/.cvmfspublished"
257 | local client_rev=$(cvmfs_talk -p "$cvmfs_io_sock" revision)
258 | local stratum0_rev=$(curl -s "$stratum0_published_url" | awk -F '^--$' '{print $1} NF>1{exit}' | grep '^S' | sed 's/^S//')
259 | if [ -z "$client_rev" ]; then
260 | log_exit_error "Failed to detect client revision"
261 | elif [ -z "$stratum0_rev" ]; then
262 | log_exit_error "Failed to detect Stratum 0 revision"
263 | elif [ "$client_rev" -ne "$stratum0_rev" ]; then
264 | log_exit_error "Importer client revision '${client_rev}' does not match Stratum 0 revision '${stratum0_rev}'"
265 | fi
266 |
267 | log "${REPO} is revision ${client_rev}"
268 | }
269 |
270 |
271 | function mount_overlay() {
272 | log "Mounting OverlayFS/CVMFS"
273 | log_debug "\$JOB_NAME: ${JOB_NAME}, \$WORKSPACE: ${WORKSPACE}, \$BUILD_NUMBER: ${BUILD_NUMBER}"
274 | log_exec mkdir -p "$OVERLAYFS_LOWER" "$OVERLAYFS_UPPER" "$OVERLAYFS_WORK" "$OVERLAYFS_MOUNT" "$CVMFS_CACHE"
275 | log_exec cvmfs2 -o config=.ci/cvmfs-fuse.conf,allow_root "$REPO" "$OVERLAYFS_LOWER"
276 | LOCAL_CVMFS_MOUNTED=true
277 | verify_cvmfs_revision
278 | log_exec fuse-overlayfs \
279 | -o "lowerdir=${OVERLAYFS_LOWER},upperdir=${OVERLAYFS_UPPER},workdir=${OVERLAYFS_WORK},allow_root" \
280 | "$OVERLAYFS_MOUNT"
281 | LOCAL_OVERLAYFS_MOUNTED=true
282 | }
283 |
284 |
285 | function unmount_overlay() {
286 | log "Unmounting OverlayFS/CVMFS"
287 | if $LOCAL_OVERLAYFS_MOUNTED; then
288 | log_exec fusermount -u "$OVERLAYFS_MOUNT"
289 | LOCAL_OVERLAYFS_MOUNTED=false
290 | fi
291 | # DEBUG: what is holding this?
292 | log_exec fuser -v "$OVERLAYFS_LOWER" || true
293 | # Attempt to kill anything still accessing lower so unmount doesn't fail
294 | log_exec fuser -v -k "$OVERLAYFS_LOWER" || true
295 | log_exec fusermount -u "$OVERLAYFS_LOWER"
296 | LOCAL_CVMFS_MOUNTED=false
297 | }
298 |
299 |
300 | function start_ssh_control() {
301 | log "Starting SSH control connection to Stratum 0"
302 | SSH_MASTER_SOCKET="${SSH_MASTER_SOCKET_DIR}/ssh-tunnel-${REPO_USER}-${REPO_STRATUM0}.sock"
303 | log_exec mkdir -p "$SSH_MASTER_SOCKET_DIR"
304 | log_exec ssh -M -S "$SSH_MASTER_SOCKET" -Nfn -l "$REPO_USER" "$REPO_STRATUM0"
305 | USER_UID=$(exec_on id -u)
306 | USER_GID=$(exec_on id -g)
307 | SSH_MASTER_UP=true
308 | }
309 |
310 |
311 | function stop_ssh_control() {
312 | log "Stopping SSH control connection to Stratum 0"
313 | log_exec ssh -S "$SSH_MASTER_SOCKET" -O exit -l "$REPO_USER" "$REPO_STRATUM0"
314 | rm -f "$SSH_MASTER_SOCKET"
315 | SSH_MASTER_UP=false
316 | }
317 |
318 |
319 | function begin_transaction() {
320 | # $1 >= 0 number of seconds to retry opening transaction for
321 | local max_wait="${1:--1}"
322 | local start=$(date +%s)
323 | local elapsed='-1'
324 | local sleep='4'
325 | local max_sleep='60'
326 | log "Opening transaction on $REPO"
327 | while ! exec_on cvmfs_server transaction "$REPO"; do
328 | log "Failed to open CVMFS transaction on ${REPO}"
329 | if [ "$max_wait" -eq -1 ]; then
330 | log_exit_error 'Transaction open retry disabled, giving up!'
331 | elif [ "$elapsed" -ge "$max_wait" ]; then
332 | log_exit_error "Time waited (${elapsed}s) exceeds limit (${max_wait}s), giving up!"
333 | fi
334 | log "Will retry in ${sleep}s"
335 | sleep $sleep
336 | [ $sleep -ne $max_sleep ] && let sleep="${sleep}*2"
337 | [ $sleep -gt $max_sleep ] && sleep="$max_sleep"
338 | let elapsed="$(date +%s)-${start}"
339 | done
340 | CVMFS_TRANSACTION_UP=true
341 | }
342 |
343 |
344 | function abort_transaction() {
345 | log "Aborting transaction on $REPO"
346 | exec_on cvmfs_server abort -f "$REPO"
347 | CVMFS_TRANSACTION_UP=false
348 | }
349 |
350 |
351 | function publish_transaction() {
352 | log "Publishing transaction on $REPO"
353 | exec_on "cvmfs_server publish -a 'idc-${GIT_COMMIT:0:7}.${DM_STAGE}' -m 'Automated data installation for commit ${GIT_COMMIT}' ${REPO}"
354 | CVMFS_TRANSACTION_UP=false
355 | }
356 |
357 |
358 | function create_workdir() {
359 | # Sets global $WORKDIR
360 | log "Creating local workdir"
361 | WORKDIR=$(log_exec mktemp -d -t idc.work.XXXXXX)
362 | }
363 |
364 |
365 | function create_remote_workdir() {
366 | # Sets global $REMOTE_WORKDIR
367 | log "Creating remote workdir"
368 | REMOTE_WORKDIR=$(exec_on mktemp -d -p "$REMOTE_WORKDIR_PARENT" -t idc.work.XXXXXX)
369 | }
370 |
371 |
372 | function prep_docker_image() {
373 | if $USE_DOCKER && $IMPORT_DOCKER_IMAGE_PULL; then
374 | log "Fetching latest Galaxy image"
375 | exec_on docker pull "$IMPORT_DOCKER_IMAGE"
376 | fi
377 | }
378 |
379 |
380 | function run_build_galaxy() {
381 | setup_ansible
382 | log "Starting Build Galaxy"
383 | # This is set beforehand so that the teardown playbook will destroy the instance if launch fails partway through
384 | BUILD_GALAXY_UP=true
385 | . ./ansible-venv/bin/activate
386 | pushd ansible
387 | log_exec ansible-playbook playbook-launch.yaml
388 | popd
389 | deactivate
390 | wait_for_cvmfs_sync
391 | }
392 |
393 |
394 | function wait_for_cvmfs_sync() {
395 | # TODO merge with verify_cvmfs_revision() used by build side
396 | # TODO: could avoid the hardcoding by using ansible but the output is harder to process
397 | local stratum0_published_url="http://${REPO_STRATUM0}/cvmfs/${REPO}/.cvmfspublished"
398 | while true; do
399 | # ensure it's mounted
400 | ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -l rocky -i ~/.ssh/id_rsa_idc_jetstream2_cvmfs idc-build ls /cvmfs/${REPO} >/dev/null
401 | local client_rev=$(ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -l rocky -i ~/.ssh/id_rsa_idc_jetstream2_cvmfs idc-build sudo cvmfs_talk -i ${REPO} revision)
402 | local stratum0_rev=$(curl -s "$stratum0_published_url" | awk -F '^--$' '{print $1} NF>1{exit}' | grep '^S' | sed 's/^S//')
403 | if [ "$client_rev" -eq "$stratum0_rev" ]; then
404 | log "${REPO} is revision ${client_rev}"
405 | break
406 | else
407 | log_debug "Builder client revision '${client_rev}' does not match Stratum 0 revision '${stratum0_rev}'"
408 | sleep 60
409 | fi
410 | done
411 | }
412 |
413 |
414 | function wait_for_build_galaxy() {
415 | log "Waiting for Galaxy"
416 | log_exec "${EPHEMERIS_BIN}/galaxy-wait" -v -g "$BUILD_GALAXY_URL" --timeout 180 || {
417 | log_error "Timed out waiting for Galaxy"
418 | #exec_on journalctl -u galaxy-gunicorn
419 | #log_debug "response from ${IMPORT_GALAXY_URL}";
420 | curl -s "$BUILD_GALAXY_URL";
421 | log_exit_error "Terminating build due to previous errors"
422 | }
423 | }
424 |
425 |
426 | function stop_build_galaxy() {
427 | . ./ansible-venv/bin/activate
428 | log "Stopping Build Galaxy"
429 | pushd ansible
430 | log_exec ansible-playbook playbook-teardown.yaml
431 | BUILD_GALAXY_UP=false
432 | popd
433 | deactivate
434 | }
435 |
436 |
437 | function install_data_managers() {
438 | log "Generating Data Manager tool list"
439 | log_exec _idc-data-managers-to-tools
440 | log "Installing Data Managers"
441 | log_exec shed-tools install -t tools.yml -g "$BUILD_GALAXY_URL"
442 | }
443 |
444 |
445 | function generate_data_manager_tasks() {
446 | # returns false if there are no data managers to run
447 | log "Generating Data Manager tasks"
448 | log_exec "${EPHEMERIS_BIN}/_idc-split-data-manager-genomes" -g "$PUBLISH_GALAXY_URL" --tool-id-mode short
449 | compgen -G "data_manager_tasks/*/data_manager_*/run_data_managers.yaml" >/dev/null
450 | }
451 |
452 |
453 | function run_data_managers() {
454 | # TODO: eventually these will specify their stage somehow
455 | compgen -G "data_manager_tasks/*/data_manager_fetch_genome_dbkeys_all_fasta/run_data_managers.yaml" >/dev/null && {
456 | run_stage0_data_managers
457 | } || {
458 | compgen -G "data_manager_tasks/*/data_manager_*/run_data_managers.yaml" >/dev/null && {
459 | run_stage1_data_managers
460 | }
461 | }
462 | }
463 |
464 |
465 | function run_stage0_data_managers() {
466 | local dm_config a
467 | log "Running Stage 0 Data Managers"
468 | DM_STAGE=0
469 | pushd data_manager_tasks
470 | for dm_config in */data_manager_fetch_genome_dbkeys_all_fasta/run_data_managers.yaml; do
471 | readarray -td/ a <<<"$dm_config"
472 | run_data_manager "${a[0]}" "${a[1]}" "$dm_config"
473 | done
474 | popd
475 | }
476 |
477 |
478 | function run_stage1_data_managers() {
479 | local dm_config a record
480 | log "Running Stage 1 Data Managers"
481 | DM_STAGE=1
482 | pushd data_manager_tasks
483 | for dm_config in */*/run_data_managers.yaml; do
484 | readarray -td/ a <<<"$dm_config"
485 | # this should never be false since we run either/or stage 0 or stage 1 in the caller
486 | [ "${a[1]}" != 'data_manager_fetch_genome_dbkeys_all_fasta' ] || continue
487 | run_data_manager "${a[0]}" "${a[1]}" "$dm_config"
488 | done
489 | popd
490 | }
491 |
492 |
493 | function run_data_manager() {
494 | local build_id="$1"
495 | local dm_repo_id="$2"
496 | local dm_config="$3"
497 | log "Running Data Manager '$dm_repo_id' for build '$build_id'"
498 | log_exec "${EPHEMERIS_BIN}/run-data-managers" --config "$dm_config" -g "$BUILD_GALAXY_URL" --data-manager-mode bundle --history-name "idc-${build_id}-${dm_repo_id}"
499 | }
500 |
501 |
502 | function run_container_for_preconfigure() {
503 | # Sets globals $PRECONFIGURE_CONTAINER_NAME $PRECONFIGURED_IMAGE_NAME
504 | PRECONFIGURE_CONTAINER_NAME="${CONTAINER_NAME}-preconfigure"
505 | PRECONFIGURED_IMAGE_NAME="${PRECONFIGURE_CONTAINER_NAME}d"
506 | ORIGINAL_IMAGE_NAME="$IMPORT_DOCKER_IMAGE"
507 | log "Starting import container for preconfiguration"
508 | exec_on docker run -d --name="$PRECONFIGURE_CONTAINER_NAME" \
509 | -v "${WORKDIR}/:/work/" \
510 | "$IMPORT_DOCKER_IMAGE" sleep infinity
511 | IMPORT_CONTAINER_UP=true
512 | }
513 |
514 |
515 | function commit_preconfigured_container() {
516 | log "Stopping and committing preconfigured container"
517 | exec_on docker kill "$PRECONFIGURE_CONTAINER_NAME"
518 | IMPORT_CONTAINER_UP=false
519 | exec_on docker commit "$PRECONFIGURE_CONTAINER_NAME" "$PRECONFIGURED_IMAGE_NAME"
520 | IMPORT_DOCKER_IMAGE="$PRECONFIGURED_IMAGE_NAME"
521 | }
522 |
523 |
524 | function clean_preconfigured_container() {
525 | [ -n "${PRECONFIGURED_IMAGE_NAME:-}" ] || return 0
526 | exec_on docker kill "$PRECONFIGURE_CONTAINER_NAME" || true
527 | exec_on docker rm -v "$PRECONFIGURE_CONTAINER_NAME" || true
528 | exec_on docker rmi -f "$PRECONFIGURED_IMAGE_NAME" || true
529 | }
530 |
531 |
532 | function generate_import_tasks() {
533 | # returns false if there is no data manager to import
534 | log "Generating import tasks"
535 | copy_to genomes.yml
536 | copy_to data_managers.yml
537 | exec_on "${EPHEMERIS_BIN}/_idc-split-data-manager-genomes" --complete-check-cvmfs "--cvmfs-root=${OVERLAYFS_LOWER}" "--merged-genomes-path=${REMOTE_WORKDIR}/genomes.yml" "--data-managers-path=${REMOTE_WORKDIR}/data_managers.yml" "--split-genomes-path=${REMOTE_WORKDIR}/import_tasks"
538 | exec_on "compgen -G '${REMOTE_WORKDIR}/import_tasks/*/data_manager_*/run_data_managers.yaml'" >/dev/null
539 | }
540 |
541 |
542 | function update_tool_data_table_conf() {
543 | # update tool_data_table_conf.xml from repo
544 | copy_to config/tool_data_table_conf.xml
545 | exec_on diff -q "${REMOTE_WORKDIR}/tool_data_table_conf.xml" "/cvmfs/${REPO}/config/tool_data_table_conf.xml" || { exec_on mkdir -p "${OVERLAYFS_MOUNT}/config" && exec_on cp "${REMOTE_WORKDIR}/tool_data_table_conf.xml" "${OVERLAYFS_MOUNT}/config/tool_data_table_conf.xml"; }
546 | }
547 |
548 | function run_import_container() {
549 | run_container_for_preconfigure
550 | log "Installing importer scripts"
551 | exec_on docker exec "$PRECONFIGURE_CONTAINER_NAME" yum install -y python39 git
552 | exec_on docker exec "$PRECONFIGURE_CONTAINER_NAME" pip3 install --upgrade pip wheel setuptools
553 | exec_on docker exec "$PRECONFIGURE_CONTAINER_NAME" /usr/local/bin/pip install "$GALAXY_MAINTENANCE_SCRIPTS"
554 | commit_preconfigured_container
555 |
556 | log "Starting importer container"
557 | exec_on docker run -d --user "${USER_UID}:${USER_GID}" --name="${CONTAINER_NAME}" \
558 | -v "${OVERLAYFS_MOUNT}:/cvmfs/${REPO}" \
559 | "$IMPORT_DOCKER_IMAGE" sleep infinity
560 | IMPORT_CONTAINER_UP=true
561 | }
562 |
563 |
564 | function stop_import_container() {
565 | log "Stopping importer container"
566 | # NOTE: docker rm -f exits 1 if the container does not exist
567 | exec_on docker stop "$CONTAINER_NAME" || true # try graceful shutdown first
568 | exec_on docker kill "$CONTAINER_NAME" || true # probably failed to start, don't prevent the rest of cleanup
569 | exec_on docker rm -v "$CONTAINER_NAME" || true
570 | IMPORT_CONTAINER_UP=false
571 | }
572 |
573 |
574 | function import_tool_data_bundles() {
575 | local dm_config j build_id dm_repo_id bundle_uri record_file
576 | copy_to .ci/get-bundle-url.py
577 | for dm_config in $(exec_on "compgen -G '${REMOTE_WORKDIR}/import_tasks/*/data_manager_*/run_data_managers.yaml'"); do
578 | IFS='/' read build_id dm_repo_id j <<< "${dm_config##${REMOTE_WORKDIR}/import_tasks/}"
579 | record_file="${REMOTE_WORKDIR}/import_tasks/${build_id}/${dm_repo_id}/bundle.txt"
580 | log "Importing bundle for Data Manager '$dm_repo_id' of '$build_id'"
581 | # API key is filtered from output by Jenkins
582 | local bundle_uri="$(exec_on ${EPHEMERIS_BIN}/python3 ${REMOTE_WORKDIR}/get-bundle-url.py --galaxy-url "$PUBLISH_GALAXY_URL" --history-name "idc-${build_id}-${dm_repo_id}" --record-file="$record_file" --galaxy-api-key="$EPHEMERIS_API_KEY")"
583 | [ -n "$bundle_uri" ] || log_exit_error "Could not determine bundle URI!"
584 | log_debug "bundle URI is: $bundle_uri"
585 | if $USE_DOCKER; then
586 | exec_on docker exec "$CONTAINER_NAME" mkdir -p "/cvmfs/${REPO}/data" "/cvmfs/${REPO}/record/${build_id}"
587 | exec_on docker exec "$CONTAINER_NAME" /usr/local/bin/galaxy-import-data-bundle --tool-data-path "/cvmfs/${REPO}/data" --data-table-config-path "/cvmfs/${REPO}/config/tool_data_table_conf.xml" "$bundle_uri"
588 | exec_on rsync -av "import_tasks/${build_id}/${dm_repo_id}" "${OVERLAYFS_MOUNT}/record/${build_id}"
589 | else
590 | exec_on mkdir -p "/cvmfs/${REPO}/data" "/cvmfs/${REPO}/record/${build_id}"
591 | exec_on "TMPDIR=${REMOTE_WORKDIR}" "${GALAXY_MAINTENANCE_SCRIPTS_BIN}/galaxy-import-data-bundle" --tool-data-path "/cvmfs/${REPO}/data" --data-table-config-path "/cvmfs/${REPO}/config/tool_data_table_conf.xml" "$bundle_uri"
592 | exec_on rsync -av "${REMOTE_WORKDIR}/import_tasks/${build_id}/${dm_repo_id}" "${OVERLAYFS_MOUNT}/record/${build_id}"
593 | fi
594 | done
595 | }
596 |
597 |
598 | function show_logs() {
599 | local lines=
600 | if [ -n "${1:-}" ]; then
601 | lines="--tail ${1:-}"
602 | log_debug "tail ${lines} of server log";
603 | else
604 | log_debug "contents of server log";
605 | fi
606 | exec_on docker logs $lines "$CONTAINER_NAME"
607 | }
608 |
609 |
610 | function show_paths() {
611 | log "contents of OverlayFS upper mount (will be published)"
612 | exec_on tree "$OVERLAYFS_UPPER"
613 | }
614 |
615 |
616 | function check_for_repo_changes() {
617 | local lower=
618 | local changes=false
619 | log "Checking for changes to repo"
620 | show_paths
621 | for config in $(exec_on "compgen -G '${OVERLAYFS_UPPER}/config/*'"); do
622 | exec_on test -f "$config" || continue
623 | lower="${OVERLAYFS_LOWER}/config/${config##*/}"
624 | exec_on test -f "$lower" || lower=/dev/null
625 | exec_on diff -q "$lower" "$config" || { changes=true; exec_on diff -u "$lower" "$config" || true; }
626 | done
627 | if ! $changes; then
628 | log_exit_error "Terminating build: expected changes to ${OVERLAYFS_UPPER}/config/* not found!"
629 | fi
630 | }
631 |
632 |
633 | function clean_workspace() {
634 | log_exec rm -rf "${WORKSPACE}/${BUILD_NUMBER}"
635 | }
636 |
637 |
638 | function post_install() {
639 | log "Running post-installation tasks"
640 | exec_on "find '$OVERLAYFS_UPPER' -perm -u+r -not -perm -o+r -not -type l -print0 | xargs -0 --no-run-if-empty chmod go+r"
641 | exec_on "find '$OVERLAYFS_UPPER' -perm -u+rx -not -perm -o+rx -not -type l -print0 | xargs -0 --no-run-if-empty chmod go+rx"
642 | }
643 |
644 |
645 | function copy_upper_to_stratum0() {
646 | log "Copying changes to Stratum 0"
647 | set -x
648 | rsync -ah -e "ssh -o ControlPath=${SSH_MASTER_SOCKET}" --stats "${OVERLAYFS_UPPER}/" "${REPO_USER}@${REPO_STRATUM0}:/cvmfs/${REPO}"
649 | { rc=$?; set +x; } 2>/dev/null
650 | return $rc
651 | }
652 |
653 |
654 | function do_import_local() {
655 | mount_overlay
656 | # TODO: we could probably replace the import container with whatever cvmfsexec does to fake a mount
657 | if generate_import_tasks; then
658 | create_workdir
659 | prep_for_galaxy_run
660 | update_tool_data_table_conf
661 | run_import_container
662 | import_tool_data_bundles
663 | check_for_repo_changes
664 | stop_import_container
665 | clean_preconfigured_container
666 | post_install
667 | else
668 | log "Nothing to import"
669 | PUBLISH=false
670 | fi
671 | if $PUBLISH; then
672 | start_ssh_control
673 | begin_transaction 600
674 | copy_upper_to_stratum0
675 | publish_transaction
676 | stop_ssh_control
677 | fi
678 | unmount_overlay
679 | }
680 |
681 |
682 | function do_import_remote() {
683 | start_ssh_control
684 | create_remote_workdir
685 | setup_remote_ephemeris
686 | # from this point forward $EPHEMERIS_BIN refers to remote
687 | if generate_import_tasks; then
688 | setup_galaxy_maintenance_scripts "$WORKDIR" "$REMOTE_PYTHON"
689 | begin_transaction
690 | update_tool_data_table_conf
691 | import_tool_data_bundles
692 | check_for_repo_changes
693 | post_install
694 | else
695 | log "Nothing to import"
696 | PUBLISH=false
697 | fi
698 | $PUBLISH && publish_transaction || abort_transaction
699 | stop_ssh_control
700 | }
701 |
702 |
703 | function main() {
704 | check_bot_command
705 | load_repo_configs
706 | detect_changes
707 | set_repo_vars
708 | setup_ephemeris
709 | if generate_data_manager_tasks; then
710 | run_build_galaxy
711 | wait_for_build_galaxy
712 | #install_data_managers
713 | run_data_managers
714 | else
715 | log "Nothing to build, will check for unimported data"
716 | fi
717 | if $USE_LOCAL_OVERLAYFS; then
718 | do_import_local
719 | else
720 | do_import_remote
721 | fi
722 | stop_build_galaxy
723 | clean_workspace
724 | return 0
725 | }
726 |
727 |
728 | main
729 |
--------------------------------------------------------------------------------
/.ci/repos.conf:
--------------------------------------------------------------------------------
1 | # source me
2 |
3 | # Map toolset dirs to repo names
4 | declare -g -A REPOS=(
5 | [sandbox]=sandbox.galaxyproject.org
6 | [idc]=idc.galaxyproject.org
7 | )
8 |
9 | # Map repo names to stratum 0s
10 | declare -g -A REPO_STRATUM0S=(
11 | [sandbox.galaxyproject.org]=cvmfs0-psu0.galaxyproject.org
12 | [idc.galaxyproject.org]=cvmfs0-psu0.galaxyproject.org
13 | )
14 |
15 | # Map repo names to owner on stratum 0s
16 | declare -g -A REPO_USERS=(
17 | [sandbox.galaxyproject.org]=sandbox
18 | [idc.galaxyproject.org]=idc
19 | )
20 |
21 | # Print for debugging
22 | declare -p REPOS REPO_STRATUM0S REPO_USERS
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # Simon's working directory
104 | working/
105 |
106 | # Ansible installed artifacts
107 | ansible/collections
108 | ansible/roles/*.*
109 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Björn Grüning
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # IDC - Simon's Data Club
2 |
3 | In memory of our friend and reference data champion, [Simon Gladman](https://www.biocommons.org.au/news/simon-gladman).
4 |
5 | Formerly the Intergalactic (reference) Data Commission
6 |
7 | The IDC is for Galaxy reference data what the [IUC](https://github.com/galaxyproject/tools-iuc) for Galaxy tools: A project by the Galaxy Team and Community to produce, host, and distribute reference data for use in Galaxy servers. Community contributions and Pull Request reviews are encouraged! Details on how to contribute can be found below.
8 |
9 | ### Summary
10 |
11 | This repository is the entry point to contribute to the community maintained CVMFS data repository hosting approximately 6TB of public and open reference datasets.
12 |
13 | Ultimately, it is envisioned that the set of files contained here would be modified with the addition of either a new genomic data set specification or a new data manager. Subsequent Pull Request acceptance would then fetch the genomic data, build the appropriate indices and upload everything to the proper position within the Galaxy project's CVMFS repositories.
14 |
15 | Comments/discussion on the approach and contributions are very welcome!
16 |
17 | Currently, the repository is geared to produce genomic indices for various tools using their data managers. The included `run_builder.sh` script will:
18 |
19 | 1. Create a virtualenv with the required software
20 | 2. Create a docker Galaxy instance
21 | 3. Install the data manager tools listed in `data_managers_tools.yml`
22 | 4. Dynamically create an Ephemeris .yml config file from a list of genomes and their sources
23 | 5. Fetch the genomes from the appropriate sources and install them into Galaxy's `all_fasta` data table
24 | 6. Restart Galaxy to reload the `all_fasta` data table
25 | 7. Create the tool indices using Ephemeris and the `data_managers_genomes.yml` file
26 |
27 | The resulting genome files and tool indices will be located in the directory specified in the `run_builder.sh` script in the environment variables set at the top.
28 |
29 | The two important files are:
30 |
31 | * `data_managers.yml`
32 | * `genomes.yml`
33 |
34 | ### data_managers.yml
35 |
36 | This file contains the list of data managers that are to be installed into the target
37 | Galaxy building IDC data.
38 |
39 | ```yaml
40 | NAME_OF_THE_DATA_MANAGER:
41 | tool_id: TOOL_ID_IN_TARGET_REPO_OF_DATA_MANAGER
42 | tags:
43 | - tag #Tag can be either "genome" or "fetch_source".
44 | ```
45 |
46 | Other data managers are added as elements in the `tools` yml array. The first tool listed should always be the `fetch_source` data manager. In most cases this will be the `data_manager_fetch_genome_dbkeys_all_fasta` data manager that sources and downloads most genomes and populates the `all_fasta` and `__dbkeys__` data tables for later use by other data managers.
47 |
48 | Ephemeris can be used to generate a shed-tool install file to bootstrap the required tools
49 | and repositories into a target Galaxy for IDC installs.
50 |
51 | ```bash
52 | pip install ephemeris
53 | _idc-data-managers-to-tools
54 | # defaults to:
55 | # _idc-data-managers-to-tools --data-managers-conf=genomes.yml --shed-install-output-conf=tools.yml
56 | shed-tools install -t tools.yml
57 | ```
58 |
59 | ### genomes.yml
60 |
61 | This is the file that contains the list of the genomes to be fetched and indexed.
62 |
63 | There is a lot more information in this file that Galaxy can currently use but its format has been specified with the future in mind.
64 |
65 | At this stage this file only needs to contain the `dbkey`, `description`, `id` and `source` fields. The rest are there as discussion points currently on the kind of information we would like to have stored with Galaxy to ensure provenance of the reference data used in analyses.
66 |
67 | Format:
68 |
69 | ```yaml
70 | genomes:
71 | - dbkey: #The dbkey of the data
72 | description: #The description of the data, including its taxonomy, version and date
73 | id: #The unique id of the data in Galaxy
74 | source: #The source of the data. Can be: 'ucsc', an NCBI accession number or a URL to a fasta file.
75 | doi: #Any DOI associated with the data
76 | version: #Any version information associated with the data
77 | checksum: #A SHA256 checksum of the original
78 | blob: #A blob for any other pertinent information
79 | indexers: #A list of tags for the types of data managers to be run on this data
80 | skiplist: # A list of data managers with the above specified tag NOT to be run on this data
81 |
82 | ```
83 |
84 | Example:
85 |
86 | ```yaml
87 | genomes:
88 | - dbkey: dm6
89 | description: D. melanogaster Aug. 2014 (BDGP Release 6 + ISO1 MT/dm6) (dm6)
90 | id: dm6
91 | source: ucsc
92 | doi:
93 | version:
94 | checksum:
95 | blob:
96 | indexers:
97 | - genome
98 | skiplist:
99 | - bfast
100 | - dbkey: Ecoli-O157-H7-Sakai
101 | description: "Escherichia coli O157-H7 Sakai"
102 | id: Ecoli-O157-H7-Sakai
103 | source: https://swift.rc.nectar.org.au:8888/v1/AUTH_377/public/COMP90014/Assignment1/Ecoli-O157_H7-Sakai-chr.fna
104 | doi:
105 | version:
106 | checksum:
107 | blob:
108 | indexers:
109 | - genome
110 | skiplist:
111 | - bfast
112 | - dbkey: Salm-enterica-Newport
113 | description: "Salmonella enterica subsp. enterica serovar Newport str. USMARC-S3124.1"
114 | id: Salm-enterica-Newport
115 | source: NC_021902
116 | doi:
117 | version:
118 | checksum:
119 | blob:
120 | indexers:
121 | - genome
122 | skiplist:
123 | - bfast
124 | ```
125 |
126 | ## Testing
127 |
128 | This repo can be tested using a machine with Docker installed and by a user with Docker privledges. As a warning however, some of the genomes will take a LOT (>64GB) of RAM to index.
129 |
130 | It should work just by cloning the repo to the machine, modifying the environment variables in the `run_builder.sh` script to suit and then running it.
131 |
132 | ## Other data types
133 |
134 | Work has been done on some of the other data types, tools and data managers such as those that work on multiple genomes at once like Busco, Metaphlan etc. These can be found in the `older_attempts` directory along with appropriate README.
135 | ## How to use the reference data
136 |
137 | If you want to use the reference data, please have a look at our [ansible-role](https://github.com/galaxyproject/ansible-cvmfs
138 | ) and the [example playbook](https://github.com/usegalaxy-eu/cvmfs-example).
139 |
140 |
--------------------------------------------------------------------------------
/ansible/.pass:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | if [ -z "$IDC_VAULT_PASS" ]; then
4 | echo 'WARNING: $IDC_VAULT_PASS is unset, prompting for password' >&2
5 | echo -n 'Vault password: ' >&2
6 | stty -echo
7 | read IDC_VAULT_PASS
8 | stty echo
9 | echo '' >&2
10 | fi
11 |
12 | echo "$IDC_VAULT_PASS"
13 |
--------------------------------------------------------------------------------
/ansible/ansible.cfg:
--------------------------------------------------------------------------------
1 | [defaults]
2 |
3 | # default inventory file
4 | inventory = inventory.yaml
5 |
6 | # use pass for vault
7 | vault_password_file = ./.pass
8 |
9 | # include things from galaxyproject/ansible-common-roles
10 | roles_path = roles
11 | collections_paths = collections
12 |
13 | # use openssh so that we can persist connections
14 | transport = ssh
15 |
16 | # make error messages readable
17 | stdout_callback = yaml
18 |
19 | [ssh_connection]
20 |
21 | # enable pipelining with OpenSSH
22 | pipelining = True
23 |
24 | # These are necessary for cloud instances
25 | #pipelining = False
26 | #ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no
27 |
28 | # This verbosity is not needed
29 | #[diff]
30 | #always = True
31 |
--------------------------------------------------------------------------------
/ansible/files/idc/bootstrap.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | email='idc@galaxyproject.org'
4 | username='idc'
5 | password='PBKDF2$sha256$100000$XhmbiqICQVhoO+7z$kdb1UThcjcvljNvpdCCUVYU9EZwG2sQG'
6 | database='idc'
7 | sleep_time=5
8 | sleep_count=30
9 |
10 | sql="
11 | INSERT INTO galaxy_user
12 | (create_time, update_time, email, username, password, last_password_change, external, deleted, purged, active)
13 | VALUES
14 | (NOW(), NOW(), '$email', '$username', '$password', NOW(), false, false, false, true)
15 | "
16 |
17 | count=0
18 | while [ $(psql -At -c "SELECT EXISTS (SELECT relname FROM pg_class WHERE relname = 'galaxy_user')" "$database") = 'f' ]; do
19 | echo "waiting for galaxy_user table..."
20 | count=$((count + 1))
21 | [ $count -lt $sleep_count ] || { echo "timed out"; exit 1; }
22 | sleep $sleep_time
23 | done
24 |
25 | if [ $(psql -At -c "SELECT count(*) FROM galaxy_user WHERE username = '$username'" "$database") -eq 0 ]; then
26 | psql -c "$sql" "$database"
27 | fi
28 |
--------------------------------------------------------------------------------
/ansible/files/idc/data_manager_conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
18 |
19 |
31 |
32 |
33 |
34 |
35 |
48 |
49 |
50 |
60 |
61 |
62 |
63 |
64 |
77 |
78 |
79 |
80 |
81 |
92 |
93 |
94 |
100 |
101 |
102 |
109 |
110 |
111 |
112 |
113 |
126 |
127 |
128 |
129 |
130 |
143 |
144 |
145 |
146 |
147 |
160 |
161 |
162 |
163 |
164 |
182 |
183 |
184 |
185 |
186 |
199 |
200 |
201 |
202 |
203 |
216 |
217 |
218 |
219 |
220 |
226 |
227 |
228 |
229 |
230 |
243 |
244 |
245 |
252 |
253 |
254 |
255 |
256 |
269 |
270 |
271 |
272 |
273 |
285 |
286 |
287 |
299 |
300 |
301 |
302 |
303 |
316 |
317 |
318 |
319 |
320 |
332 |
333 |
334 |
335 |
336 |
341 |
342 |
343 |
344 |
--------------------------------------------------------------------------------
/ansible/files/idc/workflow_schedulers_conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ansible/group_vars/idc_builders.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | tailscale_authkey: !vault |
4 | $ANSIBLE_VAULT;1.1;AES256
5 | 35346435633731353836346266636239366333343532373365393332613035353436373632363235
6 | 3035623864306265396366636637306531383162346637620a613064383461306134373035306663
7 | 35663562373035653262343066623332366166643866313562373136393339306165303437363833
8 | 3664366566353137360a613666333664343837336132333430343636333738663464306161346639
9 | 31636366643837386431636265386662663362623232643766333436393862633838616531306630
10 | 33396438633639636634306362353462376233633265393233393962343934343230616632333134
11 | 313765363637303239646162306161663662
12 |
13 | minio_access_key: idc
14 | minio_secret_key: !vault |
15 | $ANSIBLE_VAULT;1.1;AES256
16 | 63343861656633656332623062386662613835383031396531383961623866333734383934323361
17 | 3232616334626430333463346330363432366137613337390a356264386363303266353964376566
18 | 32373933663436646566633237313135626265393936656332373633383236616534643864366532
19 | 6130616466666665330a393833373965623365333239303039383061363334663031646632316333
20 | 30396263346464306233646466323162653437663134363261653239366630643935383964666533
21 | 3032646166333162663934623635336630323035336639313163
22 |
23 | galaxy_database_password: !vault |
24 | $ANSIBLE_VAULT;1.1;AES256
25 | 61353439363266313231353362333636633830353237653338643765313963373434303365653734
26 | 6466336130346536303136386165326665663965313365630a313564326131356237333232623637
27 | 31656534323962383662663734363430373931646166646663313430366630373561346532353536
28 | 3763316333343132640a363639663636396665313831323430363038633630613433373230663936
29 | 61333338623663323432313765613332643530643862383230363033623237313263623162613164
30 | 3532653230333665313032633133613836653031323965343234
31 |
32 | os_key_name: idc
33 |
34 | os_image: usegalaxy-node
35 | os_flavor: m3.small
36 | os_security_groups: [default, ssh-only]
37 |
38 | host_groups:
39 | - name: idc
40 | gid: 808
41 |
42 | host_users:
43 | - name: idc
44 | home: /home/idc
45 | uid: 808
46 | group: idc
47 | shell: /bin/sh
48 |
49 | host_directories:
50 | - path: /jetstream2/scratch/idc
51 | owner: idc
52 | group: idc
53 | mode: "0755"
54 |
55 | nginx_flavor: core
56 | nginx_enable_default_server: false
57 | nginx_servers:
58 | - idc-build
59 |
60 | galaxy_layout: root-dir
61 | galaxy_root: /srv/galaxy
62 | galaxy_user:
63 | name: idc
64 | galaxy_server_dir: /cvmfs/main.galaxyproject.org/galaxy
65 | galaxy_venv_dir: /cvmfs/main.galaxyproject.org/venv
66 | galaxy_config_dir: "{{ galaxy_root }}/config"
67 | galaxy_shed_tools_dir: /jetstream2/scratch/idc/shed_tools
68 | galaxy_manage_clone: false
69 | galaxy_manage_download: false
70 | galaxy_manage_existing: false
71 | galaxy_manage_paths: true
72 | galaxy_manage_static_setup: true
73 | galaxy_manage_mutable_setup: true
74 | galaxy_manage_database: false
75 | galaxy_fetch_dependencies: false
76 | galaxy_build_client: false
77 | galaxy_backup_configfiles: false
78 | galaxy_manage_gravity: true
79 |
80 | galaxy_privsep_dirs:
81 | - "{{ galaxy_config_dir }}"
82 | - "{{ galaxy_config_dir }}/tpv"
83 | galaxy_config_files:
84 | - src: files/idc/workflow_schedulers_conf.xml
85 | dest: "{{ galaxy_config_dir }}/workflow_schedulers_conf.xml"
86 | - src: files/idc/data_manager_conf.xml
87 | dest: "{{ galaxy_config_dir }}/data_manager_conf.xml"
88 | galaxy_config_templates:
89 | - src: templates/idc/tpv/idc.yaml.j2
90 | dest: "{{ galaxy_config_dir }}/tpv/idc.yaml"
91 | - src: templates/idc/object_store_conf.xml.j2
92 | dest: "{{ galaxy_config_dir }}/object_store_conf.xml"
93 | galaxy_config:
94 | gravity:
95 | process_manager: systemd
96 | galaxy_user: idc
97 | galaxy_group: idc
98 | galaxy_root: "{{ galaxy_server_dir }}"
99 | virtualenv: "{{ galaxy_venv_dir }}"
100 | gunicorn:
101 | bind: localhost:8080
102 | workers: 1
103 | #handlers:
104 | # handler:
105 | # processes: 1
106 | # pools:
107 | # - job-handlers
108 | # - workflow-schedulers
109 | galaxy:
110 | server_name: idc-build
111 | database_connection: "postgresql://idc:{{ galaxy_database_password }}@galaxy-db-02/galaxy_main"
112 | install_database_connection: "sqlite:///{{ galaxy_mutable_data_dir }}/install.sqlite?isolation_level=IMMEDIATE"
113 | tool_data_table_config_path: /cvmfs/idc.galaxyproject.org/config/tool_data_table_conf.xml
114 | data_manager_config_file: "{{ galaxy_config_dir }}/data_manager_conf.xml"
115 | #shed_data_manager_config_file: /cvmfs/main.galaxyproject.org/config/shed_data_manager_conf.xml
116 | conda_auto_init: false
117 | conda_auto_install: false
118 | allow_user_creation: false
119 | #require_login: true
120 | admin_users:
121 | - idc@galaxyproject.org
122 | - nate+test@bx.psu.edu
123 | file_path: /jetstream2/scratch/idc/objects
124 | job_working_directory: /jetstream2/scratch/idc/jobs
125 | object_store_config_file: "{{ galaxy_config_dir }}/object_store_conf.xml"
126 | object_store_store_by: uuid
127 | workflow_schedulers_config_file: "{{ galaxy_config_dir }}/workflow_schedulers_conf.xml"
128 | container_resolvers:
129 | - type: cached_mulled_singularity
130 | cache_directory: /cvmfs/singularity.galaxyproject.org/all
131 | cache_directory_cacher_type: dir_mtime
132 | #- type: mulled_singularity
133 | # cache_directory: /jetstream2/scratch/idc/singularity
134 | - type: explicit_singularity
135 | cleanup_job: never
136 | job_config:
137 | runners:
138 | local:
139 | load: galaxy.jobs.runners.local:LocalJobRunner
140 | workers: 1
141 | slurm:
142 | load: galaxy.jobs.runners.slurm:SlurmJobRunner
143 | workers: 2
144 | drmaa_library_path: /usr/lib64/libdrmaa.so.1
145 | handling:
146 | assign:
147 | - db-self
148 | execution:
149 | default: tpv_dispatcher
150 | environments:
151 | tpv_dispatcher:
152 | runner: dynamic
153 | type: python
154 | function: map_tool_to_destination
155 | rules_module: tpv.rules
156 | tpv_config_files:
157 | - "{{ galaxy_config_dir }}/tpv/idc.yaml"
158 |
159 | # need to exclude shed_data_managers_conf.xml or else the role tries to create/chown its parent
160 | #galaxy_mutable_config_files:
161 | # - src: "shed_tool_data_table_conf.xml"
162 | # dest: "{{ galaxy_config_merged[galaxy_app_config_section].shed_tool_data_table_config }}"
163 |
--------------------------------------------------------------------------------
/ansible/group_vars/js2.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | # these should work but don't appear to
4 | #export OS_APPLICATION_CREDENTIAL_ID=
5 | #export OS_APPLICATION_CREDENTIAL_SECRET=
6 |
7 | os_application_credential_id: !vault |
8 | $ANSIBLE_VAULT;1.1;AES256
9 | 39623061366238613763346532303434393965386566646362303334343863333663396562333338
10 | 3934386632396262613733306561333462613735356237620a663937386264366132643033376633
11 | 33626638623132646230653962343832656362393335303562383265623233636234653332386436
12 | 6437313033353063310a633532393132643830303238396232313137306234316162326336326365
13 | 34346239663462396363636535313739623539393564346263363266343333653637303562393339
14 | 6263313934356130616163313563303362353937663234363634
15 |
16 | os_application_credential_secret: !vault |
17 | $ANSIBLE_VAULT;1.1;AES256
18 | 32373838393538356139376130666461346265346130373166613432343134343565643262653362
19 | 3062643765616631613365306363303635333136333433310a633434616262666632623265313564
20 | 32653762303366353861666465656630303838613930313136376565333162643165653937393963
21 | 6562383562383935640a356164303962613331356366353462356539616261663261306339346139
22 | 61356632666431643132626563303466373938623262633361646337303666366531386161636536
23 | 62336338636265326138356463393534393765323933623631313431373462326461383764316331
24 | 33353636306338636531303837636561353031663966643961363739306131303034333836343434
25 | 64333364366365343033313166663236393761653436396234316563663264656637643233333334
26 | 3834
27 |
28 | cloud_id: js2
29 | clouds_yaml:
30 | clouds:
31 | js2:
32 | auth:
33 | application_credential_id: "{{ os_application_credential_id }}"
34 | application_credential_secret: "{{ os_application_credential_secret }}"
35 | auth_url: 'https://js2.jetstream-cloud.org:5000/v3/'
36 | region_name: "IU"
37 | interface: "public"
38 | identity_api_version: 3
39 | auth_type: "v3applicationcredential"
40 |
41 | os_cloud_id: "{{ cloud_id }}"
42 | os_clouds_yaml: "{{ clouds_yaml }}"
43 |
44 | os_name: "{{ inventory_hostname_short }}"
45 |
46 | os_nics:
47 | - net-name: "usegalaxy"
48 |
--------------------------------------------------------------------------------
/ansible/inventory.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | # idc hosts
3 |
4 | all:
5 | hosts:
6 | idc-build.galaxyproject.org:
7 | ansible_user: rocky
8 | ansible_become: true
9 | ansible_ssh_private_key_file: ~/.ssh/id_rsa_idc_jetstream2_cvmfs
10 | ansible_ssh_common_args: -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no
11 | children:
12 | idc_builders:
13 | hosts:
14 | idc-build.galaxyproject.org:
15 | js2:
16 | hosts:
17 | idc-build.galaxyproject.org:
18 |
--------------------------------------------------------------------------------
/ansible/playbook-launch.yaml:
--------------------------------------------------------------------------------
1 | - name: Spawn instance
2 | hosts: idc_builders
3 | gather_facts: no
4 | environment:
5 | OS_CLOUD: "{{ os_cloud_id }}"
6 | OS_IDENTITY_API_VERSION: '3'
7 | roles:
8 | - openstack
9 |
10 | - name: Configure Galaxy
11 | hosts: idc_builders
12 | tasks:
13 | - name: Install slurm-drmaa
14 | dnf:
15 | name: https://github.com/natefoo/slurm-drmaa/releases/download/1.1.4/slurm-drmaa-1.1.4-22.05.el9.x86_64.rpm
16 | disable_gpg_check: true
17 | - import_role:
18 | name: galaxyproject.general.virtual
19 | - import_role:
20 | name: galaxyproject.general.packages
21 | - import_role:
22 | name: galaxyproject.general.users
23 | - import_role:
24 | name: galaxyproject.general.paths
25 | - import_role:
26 | name: galaxyproject.galaxy
27 | - name: Flush handlers
28 | meta: flush_handlers
29 | - import_role:
30 | name: galaxyproject.nginx
31 | - import_role:
32 | name: artis3n.tailscale
33 |
--------------------------------------------------------------------------------
/ansible/playbook-teardown.yaml:
--------------------------------------------------------------------------------
1 | - name: Clean up instance
2 | hosts: idc_builders
3 | tasks:
4 | - name: Remove scratch directory
5 | file:
6 | path: /jetstream2/scratch/idc
7 | state: absent
8 | diff: false
9 | - name: Create logs directory
10 | file:
11 | path: /jetstream2/scratch/idc-build-logs
12 | owner: rocky
13 | group: rocky
14 | mode: "0755"
15 | state: directory
16 | - name: Dump gunicorn logs
17 | shell: journalctl -u galaxy-gunicorn.service > /jetstream2/scratch/idc-build-logs/gunicorn-{{ lookup('ansible.builtin.env', 'BUILD_NUMBER') | default('unknown') }}.log
18 | become_user: rocky
19 | - name: Log out from tailnet
20 | command: tailscale logout
21 |
22 | - name: Destroy instance
23 | hosts: idc_builders
24 | gather_facts: no
25 | environment:
26 | OS_CLOUD: "{{ os_cloud_id }}"
27 | OS_IDENTITY_API_VERSION: '3'
28 | tasks:
29 | - import_tasks: roles/openstack/tasks/secrets.yml
30 | - name: Destroy instance
31 | os_server:
32 | name: "{{ os_name | default(inventory_hostname) }}"
33 | state: absent
34 | delegate_to: localhost
35 | - import_tasks: roles/openstack/tasks/clean.yml
36 |
--------------------------------------------------------------------------------
/ansible/requirements.txt:
--------------------------------------------------------------------------------
1 | # the python requirements for running this playbook
2 | ansible<7
3 | dnspython
4 | openstacksdk<0.70
5 |
--------------------------------------------------------------------------------
/ansible/requirements.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | roles:
4 | - name: artis3n.tailscale
5 | versin: v4.2.2
6 | - name: galaxyproject.galaxy
7 | version: 0.10.15
8 | - name: galaxyproject.nginx
9 | version: 0.7.1
10 | - name: galaxyproject.postgresql
11 | version: 1.1.2
12 | - name: galaxyproject.postgresql_objects
13 | version: 1.2.0
14 |
15 | collections:
16 | - name: galaxyproject.general
17 | version: 1.0.0
18 |
--------------------------------------------------------------------------------
/ansible/roles/openstack/tasks/clean.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: Remove clouds.yaml
4 | file:
5 | path: "{{ inventory_dir }}/clouds.yaml"
6 | state: absent
7 | delegate_to: localhost
8 | run_once: true
9 | become: false
10 |
11 | - name: Remove additional secrets
12 | file:
13 | path: "{{ inventory_dir }}/{{ item.dest }}"
14 | state: absent
15 | loop: "{{ os_secrets | default([]) }}"
16 | delegate_to: localhost
17 | run_once: yes
18 | no_log: yes
19 |
--------------------------------------------------------------------------------
/ansible/roles/openstack/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - import_tasks: secrets.yml
4 |
5 | - include_tasks: spawn.yml
6 | when:
7 | - os_image is defined
8 | - os_flavor is defined
9 | - os_key_name is defined
10 | - os_nics is defined
11 | - os_security_groups is defined
12 |
--------------------------------------------------------------------------------
/ansible/roles/openstack/tasks/secrets.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: Write clouds.yaml
4 | copy:
5 | content: "{{ os_clouds_yaml | to_nice_yaml }}"
6 | dest: "{{ playbook_dir }}/clouds.yaml"
7 | mode: "0400"
8 | delegate_to: localhost
9 | run_once: true
10 | no_log: yes
11 |
12 | - name: Write additional secrets
13 | copy:
14 | content: "{{ item.content }}"
15 | dest: "{{ inventory_dir }}/{{ item.dest }}"
16 | mode: "0400"
17 | loop: "{{ os_secrets | default([]) }}"
18 | delegate_to: localhost
19 | run_once: yes
20 | loop_control:
21 | label: "{{ item.dest }}"
22 |
--------------------------------------------------------------------------------
/ansible/roles/openstack/tasks/spawn.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: Instance spawn block
4 | block:
5 |
6 | - name: Create volumes
7 | os_volume:
8 | cloud: "{{ os_cloud_id }}"
9 | display_name: "{{ item.name }}"
10 | size: "{{ item.size }}"
11 | delegate_to: localhost
12 | loop: "{{ os_volumes | default([]) }}"
13 |
14 | - name: Spawn new instance
15 | os_server:
16 | cloud: "{{ os_cloud_id }}"
17 | name: "{{ os_name | default(inventory_hostname) }}"
18 | image: "{{ os_image }}"
19 | flavor: "{{ os_flavor }}"
20 | key_name: "{{ os_key_name }}"
21 | nics: "{{ os_nics }}"
22 | security_groups: "{{ os_security_groups }}"
23 | auto_ip: "{{ os_auto_ip | default(omit) }}"
24 | floating_ips: "{{ os_floating_ips | default(omit) }}"
25 | meta: "group={{ group_names[0] }}"
26 | userdata: |
27 | #cloud-config
28 | package_upgrade: false
29 | delegate_to: localhost
30 | register: __spawn_result
31 |
32 | - name: Attach volumes to instances
33 | os_server_volume:
34 | cloud: "{{ os_cloud_id }}"
35 | server: "{{ os_name | default(inventory_hostname) }}"
36 | volume: "{{ item.name }}"
37 | delegate_to: localhost
38 | loop: "{{ os_volumes | default([]) }}"
39 | register: __attach_result
40 |
41 | - name: Set volume device(s) fact
42 | set_fact:
43 | __os_volume_devices: "{{ __attach_result.results | selectattr('attachments', 'defined') | map(attribute='attachments') | flatten | map(attribute='device') | list }}"
44 |
45 | - name: Set filesystems fact
46 | set_fact:
47 | filesystems: "{{ (filesystems | default([])) + [{'dev': item[1], 'fstype': item[0]}] }}"
48 | loop: "{{ (os_volumes | map(attribute='fstype')) | zip(__os_volume_devices) | list }}"
49 | when: os_volumes is defined
50 |
51 | - name: Update inventory with spawned instance IP
52 | set_fact:
53 | ansible_host: "{{ __spawn_result.server.public_v4 or __spawn_result.server.private_v4 }}"
54 | #delegate_to: localhost
55 |
56 | - name: Log IP addresses
57 | debug:
58 | var: ansible_host
59 |
60 | - name: Wait for instance to become accessible
61 | wait_for_connection:
62 | timeout: 120
63 |
64 | - name: Set authorized keys
65 | authorized_key:
66 | user: "{{ ansible_user }}"
67 | key: "{% for authorized in os_admin_users %}{{ ssh_public_keys[authorized] ~ '\n' }}{% endfor %}"
68 | exclusive: "{{ os_admin_users_exclusive | default('yes') }}"
69 | when: os_admin_users is defined
70 |
71 | always:
72 |
73 | - import_tasks: clean.yml
74 |
75 | rescue:
76 |
77 | - fail:
78 | msg: Exiting due to previous failure
79 |
--------------------------------------------------------------------------------
/ansible/templates/idc/object_store_conf.xml.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ansible/templates/idc/tpv/idc.yaml.j2:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | destinations:
4 | jetstream2:
5 | runner: slurm
6 | max_accepted_cores: 64
7 | max_accepted_mem: 244
8 | context:
9 | partition: priority
10 | time: 24:00:00
11 | params:
12 | native_specification: "--nodes=1 --ntasks={cores} --mem={round(mem*1024)} --time={time} --partition={partition}"
13 | tmp_dir: true
14 | outputs_to_working_directory: true
15 | singularity_enabled: true
16 | singularity_volumes: "$galaxy_root:ro,$tool_directory:ro,$working_directory:rw,$job_directory:rw,/cvmfs/main.galaxyproject.org:ro,/cvmfs/idc.galaxyproject.org:ro"
17 | singularity_default_container_id: /cvmfs/singularity.galaxyproject.org/all/python:3.8.3
18 | env:
19 | - execute: ulimit -c 0
20 | - name: _JAVA_OPTIONS
21 | value: $_JAVA_OPTIONS -Djava.io.tmpdir=$TEMP
22 | - name: SINGULARITYENV__JAVA_OPTIONS
23 | value: $_JAVA_OPTIONS
24 | - name: SINGULARITYENV_TERM
25 | value: vt100
26 | - name: SINGULARITYENV_LC_ALL
27 | value: C
28 | - name: SINGULARITYENV_TEMP
29 | value: $TEMP
30 | - name: SINGULARITYENV_TMPDIR
31 | value: $TEMP
32 |
33 | global:
34 | default_inherits: _default
35 |
36 | tools:
37 | _default:
38 | abstract: true
39 | cores: 1
40 | mem: cores * 2.89
41 | env:
42 | - name: _JAVA_OPTIONS
43 | value: $_JAVA_OPTIONS -Xmx{round(mem*0.9*1024)}m -Xms256m
44 | .*bowtie2_index_builder_data_manager.*:
45 | cores: 16
46 | .*bwa_mem_index_builder_data_manager.*:
47 | cores: 12
48 | mem: 48
49 | .*twobit_builder_data_manager.*:
50 | cores: 16
51 | mem: 36
52 | .*picard_index_builder_data_manager.*:
53 | mem: 12
54 | .*hisat2_index_builder_data_manager.*:
55 | cores: 10
56 | mem: 120
57 | .*rna_star_index_builder_data_manager.*:
58 | cores: 10
59 | mem: 120
60 | .*kraken2_build_database.*:
61 | cores: 64
62 | mem: 240
63 |
--------------------------------------------------------------------------------
/ansible/templates/nginx/idc-build.j2:
--------------------------------------------------------------------------------
1 | ##
2 | ## This file is maintained by Ansible - CHANGES WILL BE OVERWRITTEN
3 | ##
4 |
5 | upstream galaxy {
6 | server 127.0.0.1:8080;
7 | }
8 |
9 | server {
10 | listen *:80 default_server;
11 | server_name idc-build.galaxyproject.org;
12 |
13 | access_log syslog:server=unix:/dev/log;
14 | error_log syslog:server=unix:/dev/log;
15 |
16 | location / {
17 | proxy_pass http://galaxy;
18 | proxy_redirect off;
19 | proxy_set_header Host $host;
20 | proxy_set_header X-Real-IP $remote_addr;
21 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
22 | proxy_set_header X-Forwarded-Proto $scheme;
23 | }
24 |
25 | location /static {
26 | alias {{ galaxy_server_dir }}/static;
27 | }
28 |
29 | location /robots.txt {
30 | alias {{ galaxy_server_dir }}/static/robots.txt;
31 | }
32 |
33 | location /favicon.ico {
34 | alias {{ galaxy_server_dir }}/static/favicon.ico;
35 | }
36 |
37 | location /_x_accel_redirect {
38 | internal;
39 | alias /;
40 | add_header X-Frame-Options SAMEORIGIN;
41 | add_header X-Content-Type-Options nosniff;
42 | }
43 |
44 | location /jetstream2/scratch/idc/objects {
45 | internal;
46 | alias /jetstream2/scratch/idc/objects;
47 | }
48 | }
49 |
50 | # vim: set filetype=nginx
51 |
--------------------------------------------------------------------------------
/config/tool_data_table_conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | value, dbkey, name, path
4 |
5 |
6 |
7 | value, name, len_path
8 |
9 |
10 |
11 | value, dbkey, name, path
12 |
13 |
14 |
15 | value, dbkey, name, path
16 |
17 |
18 |
19 | value, path
20 |
21 |
22 |
23 | value, name, path
24 |
25 |
26 |
27 | type, value, path
28 |
29 |
30 |
31 | value, dbkey, name, path
32 |
33 |
34 |
35 | value, dbkey, name, path
36 |
37 |
38 |
39 | value, dbkey, name, path
40 |
41 |
42 |
43 | value, dbkey, name, path, with_gene_model, version
44 |
45 |
46 |
47 | value, dbkey, name, path
48 |
49 |
50 |
51 | value, dbkey, name, path
52 |
53 |
54 |
55 | key, version, value, name, path
56 |
57 |
58 |
59 | key, version, genome, value, name
60 |
61 |
62 |
63 | key, version, value, name
64 |
65 |
66 |
67 | value, name, path, description
68 |
69 |
70 |
71 | value, name, path
72 |
73 |
74 |
75 | value, name, path
76 |
77 |
78 |
79 | value, dbkey, version, name, path
80 |
81 |
82 |
83 | value, name, path
84 |
85 |
86 |
87 | value, name, path
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/cvmfs_current_managed/cvmfs_data_managers.yml:
--------------------------------------------------------------------------------
1 | tools:
2 | - name: data_manager_manual
3 | owner: iuc
4 | revisions:
5 | - 6524e573d9c2
6 | tool_panel_section_label: None
7 | tool_shed_url: toolshed.g2.bx.psu.edu
8 |
9 | - name: data_manager_bowtie2_index_builder
10 | owner: devteam
11 | revisions:
12 | - e87aeff2cf88
13 | - 83da94c0e4a6
14 | tool_panel_section_label: None
15 | tool_shed_url: toolshed.g2.bx.psu.edu
16 | tags:
17 | - genome
18 |
19 | - name: data_manager_gatk_picard_index_builder
20 | owner: devteam
21 | revisions:
22 | - b31f1fcb203c
23 | tool_panel_section_label: None
24 | tool_shed_url: toolshed.g2.bx.psu.edu
25 | tags:
26 | - genome
27 |
28 | - name: data_manager_hisat_index_builder
29 | owner: devteam
30 | revisions:
31 | - ba11fef120cd
32 | tool_panel_section_label: None
33 | tool_shed_url: toolshed.g2.bx.psu.edu
34 | tags:
35 | - genome
36 |
37 | - name: data_manager_fetch_genome_dbkeys_all_fasta
38 | owner: devteam
39 | revisions:
40 | - b1bc53e9bbc5
41 | - 776bb1b478a0
42 | tool_panel_section_label: None
43 | tool_shed_url: toolshed.g2.bx.psu.edu
44 | tags:
45 | - fetch_source
46 |
47 | - name: data_manager_snpeff
48 | owner: iuc
49 | revisions:
50 | - a6400027d849
51 | - 9ac823a8b328
52 | tool_panel_section_label: None
53 | tool_shed_url: toolshed.g2.bx.psu.edu
54 | tags:
55 | - snpeff
56 |
57 | - name: data_manager_plant_tribes_scaffolds_downloader
58 | owner: iuc
59 | revisions:
60 | - 5833ef61c1f8
61 | tool_panel_section_label: None
62 | tool_shed_url: toolshed.g2.bx.psu.edu
63 | tags:
64 | - plant_source
65 |
66 | - name: data_manager_twobit_builder
67 | owner: devteam
68 | revisions:
69 | - 74b09c8e5f6e
70 | - 9946bc39c834
71 | tool_panel_section_label: None
72 | tool_shed_url: toolshed.g2.bx.psu.edu
73 | tags:
74 | - genome
75 |
76 | - name: data_manager_diamond_database_builder
77 | owner: bgruening
78 | revisions:
79 | - ce62d0912b10
80 | tool_panel_section_label: None
81 | tool_shed_url: toolshed.g2.bx.psu.edu
82 | tags:
83 | - genome
84 |
85 | - name: data_manager_fetch_ncbi_taxonomy
86 | owner: devteam
87 | revisions:
88 | - 926847693e4d
89 | tool_panel_section_label: None
90 | tool_shed_url: toolshed.g2.bx.psu.edu
91 | tags:
92 | - tax_source
93 |
94 | - name: data_manager_picard_index_builder
95 | owner: devteam
96 | revisions:
97 | - b99040168706
98 | - 00491eabe22b
99 | tool_panel_section_label: None
100 | tool_shed_url: toolshed.g2.bx.psu.edu
101 | tags:
102 | - genome
103 |
104 | - name: data_manager_gemini_database_downloader
105 | owner: iuc
106 | revisions:
107 | - f57426daa04d
108 | tool_panel_section_label: None
109 | tool_shed_url: toolshed.g2.bx.psu.edu
110 | tags:
111 | - gemini
112 |
113 | - name: data_manager_snpeff
114 | owner: iuc
115 | revisions:
116 | - a6e6e8415b7f
117 | tool_panel_section_label: None
118 | tool_shed_url: testtoolshed.g2.bx.psu.edu
119 | tags:
120 | - snpeff
121 |
122 | - name: data_manager_bwa_mem_index_builder
123 | owner: devteam
124 | revisions:
125 | - 46066df8813d
126 | - cb0147ade868
127 | tool_panel_section_label: None
128 | tool_shed_url: toolshed.g2.bx.psu.edu
129 | tags:
130 | - genome
131 |
132 | - name: data_manager_fetch_genome_all_fasta
133 | owner: devteam
134 | revisions:
135 | - fb744a070bee
136 | tool_panel_section_label: None
137 | tool_shed_url: toolshed.g2.bx.psu.edu
138 | tags:
139 | - deprecated
140 |
141 | - name: data_manager_sam_fasta_index_builder
142 | owner: devteam
143 | revisions:
144 | - 2a1ac1abc3f7
145 | tool_panel_section_label: None
146 | tool_shed_url: toolshed.g2.bx.psu.edu
147 | tags:
148 | - genome
149 |
150 | - name: data_manager_hisat2_index_builder
151 | owner: iuc
152 | revisions:
153 | - d210e1f185bd
154 | - 98a60a4cfb9a
155 | tool_panel_section_label: None
156 | tool_shed_url: toolshed.g2.bx.psu.edu
157 | tags:
158 | - genome
159 |
160 | - name: data_manager_star_index_builder
161 | owner: iuc
162 | revisions:
163 | - 6ef6520f14fc
164 | - 50ca9af6db2e
165 | tool_panel_section_label: None
166 | tool_shed_url: toolshed.g2.bx.psu.edu
167 | tags:
168 | - genome
169 |
170 | - name: data_manager_bowtie_index_builder
171 | owner: iuc
172 | revisions:
173 | - 86e9af693a33
174 | - 35c1e3785c90
175 | tool_panel_section_label: None
176 | tool_shed_url: toolshed.g2.bx.psu.edu
177 | tags:
178 | - genome
179 |
180 | - name: data_manager_kallisto_index_builder
181 | owner: iuc
182 | revisions:
183 | - 6843a0db2da0
184 | tool_panel_section_label: None
185 | tool_shed_url: toolshed.g2.bx.psu.edu
186 | tags:
187 | - genome
188 |
--------------------------------------------------------------------------------
/cvmfs_current_managed/cvmfs_managed_genomes.yml:
--------------------------------------------------------------------------------
1 | - genomes:
2 | - dbkey: hg19_rCRS_pUC18_phiX174
3 | description: Homo sapiens (hg19 with mtDNA replaced with rCRS, and containing pUC18
4 | and phiX174)
5 | id: hg19_rCRS_pUC18_phiX174
6 | indexers:
7 | - data_manager_twobit_builder
8 | - data_manager_fetch_genome_all_fasta
9 | - data_manager_bowtie2_index_builder
10 | - data_manager_bwa_mem_index_builder
11 | - data_manager_sam_fasta_index_builder
12 | - data_manager_hisat2_index_builder
13 | - data_manager_picard_index_builder
14 | - data_manager_star_index_builder
15 |
16 | - dbkey: rn6
17 | description: Rat Jul. 2014 (RGSC 6.0/rn6) (rn6)
18 | id: rn6
19 | indexers:
20 | - data_manager_twobit_builder
21 | - data_manager_fetch_genome_all_fasta
22 | - data_manager_bowtie2_index_builder
23 | - data_manager_bwa_mem_index_builder
24 | - data_manager_sam_fasta_index_builder
25 | - data_manager_hisat2_index_builder
26 | - data_manager_picard_index_builder
27 | - data_manager_star_index_builder
28 |
29 | - dbkey: dm6
30 | description: D. melanogaster Aug. 2014 (BDGP Release 6 + ISO1 MT/dm6) (dm6)
31 | id: dm6
32 | indexers:
33 | - data_manager_twobit_builder
34 | - data_manager_fetch_genome_all_fasta
35 | - data_manager_bowtie2_index_builder
36 | - data_manager_bwa_mem_index_builder
37 | - data_manager_sam_fasta_index_builder
38 | - data_manager_hisat2_index_builder
39 | - data_manager_picard_index_builder
40 | - data_manager_star_index_builder
41 |
42 | - dbkey: musFur1
43 | description: Ferret Apr. 2011 (MusPutFur1.0/musFur1) (musFur1)
44 | id: musFur1
45 | indexers:
46 | - data_manager_twobit_builder
47 | - data_manager_fetch_genome_all_fasta
48 | - data_manager_bowtie2_index_builder
49 | - data_manager_bwa_mem_index_builder
50 | - data_manager_sam_fasta_index_builder
51 | - data_manager_hisat2_index_builder
52 | - data_manager_picard_index_builder
53 |
54 | - dbkey: nomLeu3
55 | description: Gibbon Oct. 2012 (GGSC Nleu3.0/nomLeu3) (nomLeu3)
56 | id: nomLeu3
57 | indexers:
58 | - data_manager_twobit_builder
59 | - data_manager_fetch_genome_all_fasta
60 | - data_manager_bowtie2_index_builder
61 | - data_manager_bwa_mem_index_builder
62 | - data_manager_sam_fasta_index_builder
63 | - data_manager_hisat2_index_builder
64 | - data_manager_picard_index_builder
65 |
66 | - dbkey: cerSim1
67 | description: White rhinoceros May 2012 (CerSimSim1.0/cerSim1) (cerSim1)
68 | id: cerSim1
69 | indexers:
70 | - data_manager_twobit_builder
71 | - data_manager_fetch_genome_all_fasta
72 | - data_manager_bowtie2_index_builder
73 | - data_manager_bwa_mem_index_builder
74 | - data_manager_sam_fasta_index_builder
75 | - data_manager_hisat2_index_builder
76 | - data_manager_picard_index_builder
77 |
78 | - dbkey: danRer10
79 | description: Zebrafish Sep. 2014 (GRCz10/danRer10) (danRer10)
80 | id: danRer10
81 | indexers:
82 | - data_manager_twobit_builder
83 | - data_manager_fetch_genome_all_fasta
84 | - data_manager_bowtie2_index_builder
85 | - data_manager_sam_fasta_index_builder
86 | - data_manager_picard_index_builder
87 |
88 | - dbkey: papAnu2
89 | description: Baboon Mar. 2012 (Baylor Panu_2.0/papAnu2) (papAnu2)
90 | id: papAnu2
91 | indexers:
92 | - data_manager_twobit_builder
93 | - data_manager_fetch_genome_all_fasta
94 | - data_manager_bowtie2_index_builder
95 | - data_manager_bwa_mem_index_builder
96 | - data_manager_sam_fasta_index_builder
97 | - data_manager_hisat2_index_builder
98 | - data_manager_picard_index_builder
99 |
100 | - dbkey: bosTau8
101 | description: Cow Jun. 2014 (Bos_taurus_UMD_3.1.1/bosTau8) (bosTau8)
102 | id: bosTau8
103 | indexers:
104 | - data_manager_twobit_builder
105 | - data_manager_fetch_genome_all_fasta
106 | - data_manager_bowtie2_index_builder
107 | - data_manager_bwa_mem_index_builder
108 | - data_manager_sam_fasta_index_builder
109 | - data_manager_hisat2_index_builder
110 | - data_manager_picard_index_builder
111 |
112 | - dbkey: melUnd1
113 | description: 'Budgerigar (Melopsittacus undulatus): melUnd1'
114 | id: melUnd1
115 | indexers:
116 | - data_manager_twobit_builder
117 | - data_manager_bowtie2_index_builder
118 | - data_manager_bwa_mem_index_builder
119 | - data_manager_hisat2_index_builder
120 |
121 | - dbkey: allMis1
122 | description: American alligator Aug. 2012 (allMis0.2/allMis1) (allMis1)
123 | id: allMis1
124 | indexers:
125 | - data_manager_twobit_builder
126 | - data_manager_fetch_genome_all_fasta
127 | - data_manager_bowtie2_index_builder
128 | - data_manager_bwa_mem_index_builder
129 | - data_manager_sam_fasta_index_builder
130 | - data_manager_hisat2_index_builder
131 | - data_manager_picard_index_builder
132 |
133 | - dbkey: vicPac1
134 | description: Alpaca Jul. 2008 (Broad/vicPac1) (vicPac1)
135 | id: vicPac1
136 | indexers:
137 | - data_manager_twobit_builder
138 | - data_manager_fetch_genome_all_fasta
139 | - data_manager_bowtie2_index_builder
140 | - data_manager_bwa_mem_index_builder
141 | - data_manager_sam_fasta_index_builder
142 | - data_manager_hisat2_index_builder
143 | - data_manager_picard_index_builder
144 |
145 | - dbkey: vicPac2
146 | description: Alpaca Mar. 2013 (Vicugna_pacos-2.0.1/vicPac2) (vicPac2)
147 | id: vicPac2
148 | indexers:
149 | - data_manager_twobit_builder
150 | - data_manager_fetch_genome_all_fasta
151 | - data_manager_bowtie2_index_builder
152 | - data_manager_bwa_mem_index_builder
153 | - data_manager_sam_fasta_index_builder
154 | - data_manager_hisat2_index_builder
155 | - data_manager_picard_index_builder
156 |
157 | - dbkey: gadMor1
158 | description: Atlantic cod May 2010 (Genofisk GadMor_May2010/gadMor1) (gadMor1)
159 | id: gadMor1
160 | indexers:
161 | - data_manager_twobit_builder
162 | - data_manager_fetch_genome_all_fasta
163 | - data_manager_bowtie2_index_builder
164 | - data_manager_bwa_mem_index_builder
165 | - data_manager_sam_fasta_index_builder
166 | - data_manager_hisat2_index_builder
167 | - data_manager_picard_index_builder
168 |
169 | - dbkey: dasNov3
170 | description: Armadillo Dec. 2011 (Baylor/dasNov3) (dasNov3)
171 | id: dasNov3
172 | indexers:
173 | - data_manager_twobit_builder
174 | - data_manager_fetch_genome_all_fasta
175 | - data_manager_bowtie2_index_builder
176 | - data_manager_bwa_mem_index_builder
177 | - data_manager_sam_fasta_index_builder
178 | - data_manager_hisat2_index_builder
179 | - data_manager_picard_index_builder
180 |
181 | - dbkey: panPan1
182 | description: Bonobo May. 2012 (Max-Planck/panPan1) (panPan1)
183 | id: panPan1
184 | indexers:
185 | - data_manager_twobit_builder
186 | - data_manager_fetch_genome_all_fasta
187 | - data_manager_bowtie2_index_builder
188 | - data_manager_bwa_mem_index_builder
189 | - data_manager_sam_fasta_index_builder
190 | - data_manager_hisat2_index_builder
191 | - data_manager_picard_index_builder
192 |
193 | - dbkey: felCat8
194 | description: Cat Nov. 2014 (ICGSC Felis_catus_8.0/felCat8) (felCat8)
195 | id: felCat8
196 | indexers:
197 | - data_manager_twobit_builder
198 | - data_manager_fetch_genome_all_fasta
199 | - data_manager_bowtie2_index_builder
200 | - data_manager_bwa_mem_index_builder
201 | - data_manager_sam_fasta_index_builder
202 | - data_manager_hisat2_index_builder
203 | - data_manager_picard_index_builder
204 |
205 | - dbkey: aptMan1
206 | description: Brown Kiwi Jun. 2015 (MPI-EVA AptMant0/aptMan1) (aptMan1)
207 | id: aptMan1
208 | indexers:
209 | - data_manager_twobit_builder
210 | - data_manager_fetch_genome_all_fasta
211 | - data_manager_bowtie2_index_builder
212 | - data_manager_bwa_mem_index_builder
213 | - data_manager_sam_fasta_index_builder
214 | - data_manager_hisat2_index_builder
215 | - data_manager_picard_index_builder
216 |
217 | - dbkey: bosTau7
218 | description: 'Cow (Bos taurus): bosTau7'
219 | id: bosTau7
220 | indexers:
221 | - data_manager_twobit_builder
222 | - data_manager_hisat2_index_builder
223 |
224 | - dbkey: apiMel4
225 | description: A. mellifera 04 Nov 2010 (Amel_4.5/apiMel4) (apiMel4)
226 | id: apiMel4
227 | indexers:
228 | - data_manager_twobit_builder
229 | - data_manager_fetch_genome_all_fasta
230 | - data_manager_bowtie2_index_builder
231 | - data_manager_bwa_mem_index_builder
232 | - data_manager_sam_fasta_index_builder
233 | - data_manager_hisat2_index_builder
234 | - data_manager_picard_index_builder
235 | - data_manager_star_index_builder
236 |
237 | - dbkey: Amel_4.5
238 | description: A. mellifera Nov. 2010 (GCF_000002195.4/Amel_4.5) (Amel_4.5)
239 | id: Amel_4.5
240 | indexers:
241 | - data_manager_twobit_builder
242 | - data_manager_fetch_genome_all_fasta
243 | - data_manager_bowtie2_index_builder
244 | - data_manager_bwa_mem_index_builder
245 | - data_manager_sam_fasta_index_builder
246 | - data_manager_hisat2_index_builder
247 | - data_manager_picard_index_builder
248 | - data_manager_star_index_builder
249 |
250 | - dbkey: taeGut2
251 | description: Zebra finch Feb. 2013 (WashU taeGut324/taeGut2)
252 | id: taeGut2
253 | indexers:
254 | - data_manager_fetch_genome_all_fasta
255 |
256 | - dbkey: criGri1
257 | description: Chinese hamster Jul. 2013 (C_griseus_v1.0/criGri1) (criGri1)
258 | id: criGri1
259 | indexers:
260 | - data_manager_fetch_genome_all_fasta
261 |
262 | - dbkey: latCha1
263 | description: Coelacanth Aug. 2011 (Broad/latCha1) (latCha1)
264 | id: latCha1
265 | indexers:
266 | - data_manager_fetch_genome_all_fasta
267 |
268 | - dbkey: sacCer3
269 | description: 'Yeast (Saccharomyces cerevisiae): sacCer3'
270 | id: sacCer3
271 | indexers:
272 | - data_manager_bowtie2_index_builder
273 | - data_manager_bwa_mem_index_builder
274 | - data_manager_hisat2_index_builder
275 | - data_manager_star_index_builder
276 |
277 | - dbkey: sacCer2
278 | description: 'Yeast (Saccharomyces cerevisiae): sacCer2'
279 | id: sacCer2
280 | indexers:
281 | - data_manager_bowtie2_index_builder
282 | - data_manager_bwa_mem_index_builder
283 | - data_manager_hisat2_index_builder
284 | - data_manager_star_index_builder
285 |
286 | - dbkey: Schizosaccharomyces_pombe_1.1
287 | description: 'Fission Yeast (Schizosaccharomyces pombe): Schizosaccharomyces_pombe_1.1'
288 | id: Schizosaccharomyces_pombe_1.1
289 | indexers:
290 | - data_manager_bowtie2_index_builder
291 | - data_manager_bwa_mem_index_builder
292 | - data_manager_hisat2_index_builder
293 |
294 | - dbkey: galGal4
295 | description: Chicken (Nov 2011, Gallus gallus)
296 | id: galgal4
297 | indexers:
298 | - data_manager_bowtie2_index_builder
299 |
300 | - dbkey: loxAfr1
301 | description: 'Elephant (Loxodonta africana africana): loxAfr1'
302 | id: loxAfr1
303 | indexers:
304 | - data_manager_bowtie2_index_builder
305 | - data_manager_bwa_mem_index_builder
306 | - data_manager_hisat2_index_builder
307 |
308 | - dbkey: loxAfr3
309 | description: 'Elephant (Loxodonta africana africana): loxAfr3'
310 | id: loxAfr3
311 | indexers:
312 | - data_manager_bowtie2_index_builder
313 | - data_manager_bwa_mem_index_builder
314 | - data_manager_hisat2_index_builder
315 |
316 | - dbkey: oryza_sativa_japonica_nipponbare_IRGSP4.0
317 | description: 'Rice (Oryza sativa L. ssp. japonica var. Nipponbare): IRGSP4.0'
318 | id: oryza_sativa_japonica_nipponbare_IRGSP4.0
319 | indexers:
320 | - data_manager_bowtie2_index_builder
321 |
322 | - dbkey: melGal1
323 | description: 'Turkey (Meleagris gallopavo): melGal1'
324 | id: melGal1
325 | indexers:
326 | - data_manager_bowtie2_index_builder
327 | - data_manager_bwa_mem_index_builder
328 | - data_manager_hisat2_index_builder
329 |
330 | - dbkey: equCab1
331 | description: 'Horse (Equus caballus): equCab1'
332 | id: equCab1
333 | indexers:
334 | - data_manager_bowtie2_index_builder
335 | - data_manager_bwa_mem_index_builder
336 | - data_manager_hisat2_index_builder
337 |
338 | - dbkey: rheMac2
339 | description: 'Rhesus Macaque (Macaca mulatta): rheMac2'
340 | id: rheMac2
341 | indexers:
342 | - data_manager_bowtie2_index_builder
343 | - data_manager_bwa_mem_index_builder
344 | - data_manager_hisat2_index_builder
345 |
346 | - dbkey: galGal4
347 | description: 'Chicken (Gallus gallus): galGal4'
348 | id: galGal4
349 | indexers:
350 | - data_manager_bowtie2_index_builder
351 | - data_manager_bwa_mem_index_builder
352 | - data_manager_hisat2_index_builder
353 |
354 | - dbkey: equCab2
355 | description: 'Horse (Equus caballus): equCab2'
356 | id: equCab2
357 | indexers:
358 | - data_manager_bowtie2_index_builder
359 | - data_manager_bwa_mem_index_builder
360 | - data_manager_hisat2_index_builder
361 |
362 | - dbkey: rheMac3
363 | description: 'Rhesus Macaque (Macaca mulatta): rheMac3'
364 | id: rheMac3
365 | indexers:
366 | - data_manager_bowtie2_index_builder
367 | - data_manager_bwa_mem_index_builder
368 | - data_manager_hisat2_index_builder
369 |
370 | - dbkey: eschColi_K12
371 | description: 'Escherichia coli (str. K-12 substr. MG1655): eschColi_K12'
372 | id: eschColi_K12
373 | indexers:
374 | - data_manager_bowtie2_index_builder
375 | - data_manager_bwa_mem_index_builder
376 | - data_manager_hisat2_index_builder
377 |
378 | - dbkey: galGal3
379 | description: 'Chicken (Gallus gallus): galGal3 Canonical'
380 | id: galGal3canon
381 | indexers:
382 | - data_manager_bowtie2_index_builder
383 | - data_manager_bwa_mem_index_builder
384 |
385 | - dbkey: galGal3
386 | description: 'Chicken (Gallus gallus): galGal3 Full'
387 | id: galGal3full
388 | indexers:
389 | - data_manager_bowtie2_index_builder
390 | - data_manager_bwa_mem_index_builder
391 |
392 | - dbkey: canFam3
393 | description: 'Dog (Canis lupus familiaris): canFam3'
394 | id: canFam3
395 | indexers:
396 | - data_manager_bwa_mem_index_builder
397 | - data_manager_hisat2_index_builder
398 |
399 | - dbkey: ce9
400 | description: 'C. elegans (WS210): ce9'
401 | id: ce9
402 | indexers:
403 | - data_manager_bwa_mem_index_builder
404 | - data_manager_hisat2_index_builder
405 | - data_manager_star_index_builder
406 |
407 | - dbkey: canFam2
408 | description: 'Dog (Canis lupus familiaris): canFam2'
409 | id: canFam2
410 | indexers:
411 | - data_manager_bwa_mem_index_builder
412 | - data_manager_hisat2_index_builder
413 |
414 | - dbkey: susScr2
415 | description: 'Pig (Sus scrofa): susScr2'
416 | id: susScr2
417 | indexers:
418 | - data_manager_bwa_mem_index_builder
419 | - data_manager_hisat2_index_builder
420 |
421 | - dbkey: bosTauMd3
422 | description: 'Cow (Bos taurus): bosTauMd3'
423 | id: bosTauMd3
424 | indexers:
425 | - data_manager_bwa_mem_index_builder
426 | - data_manager_hisat2_index_builder
427 |
428 | - dbkey: papHam1
429 | description: 'Baboon (Papio anubis): papHam1'
430 | id: papHam1
431 | indexers:
432 | - data_manager_bwa_mem_index_builder
433 | - data_manager_hisat2_index_builder
434 |
435 | - dbkey: otoGar3
436 | description: 'Bushbaby (Otolemur garnetti): otoGar3'
437 | id: otoGar3
438 | indexers:
439 | - data_manager_bwa_mem_index_builder
440 | - data_manager_hisat2_index_builder
441 |
442 | - dbkey: felCat5
443 | description: 'Cat (Felis catus): felCat5'
444 | id: felCat5
445 | indexers:
446 | - data_manager_bwa_mem_index_builder
447 | - data_manager_hisat2_index_builder
448 |
449 | - dbkey: otoGar1
450 | description: 'Bushbaby (Otolemur garnetti): otoGar1'
451 | id: otoGar1
452 | indexers:
453 | - data_manager_bwa_mem_index_builder
454 | - data_manager_hisat2_index_builder
455 |
456 | - dbkey: panTro4
457 | description: 'Chimpanzee (Pan troglodytes): panTro4'
458 | id: panTro4
459 | indexers:
460 | - data_manager_bwa_mem_index_builder
461 | - data_manager_hisat2_index_builder
462 |
463 | - dbkey: turTru2
464 | description: 'Dolphin (Tursiops truncatus): turTru2'
465 | id: turTru2
466 | indexers:
467 | - data_manager_bwa_mem_index_builder
468 | - data_manager_hisat2_index_builder
469 |
470 | - dbkey: panTro3
471 | description: 'Chimpanzee (Pan troglodytes): panTro3 Canonical'
472 | id: panTro3canon
473 | indexers:
474 | - data_manager_bwa_mem_index_builder
475 | - data_manager_hisat2_index_builder
476 |
477 | - dbkey: panTro3
478 | description: 'Chimpanzee (Pan troglodytes): panTro3 Full'
479 | id: panTro3full
480 | indexers:
481 | - data_manager_bwa_mem_index_builder
482 | - data_manager_hisat2_index_builder
483 |
484 | - dbkey: Araly1
485 | description: 'Arabidopsis lyrata: Araly1'
486 | id: Araly1
487 | indexers:
488 | - data_manager_sam_fasta_index_builder
489 |
490 | - dbkey: dm3
491 | description: 'Fruit Fly (Drosophila melanogaster): dm3'
492 | id: dm3
493 | indexers:
494 | - data_manager_hisat2_index_builder
495 | - data_manager_star_index_builder
496 |
497 | - dbkey: mm10
498 | description: 'Mouse (Mus Musculus): mm10'
499 | id: mm10
500 | indexers:
501 | - data_manager_hisat2_index_builder
502 | - data_manager_star_index_builder
503 |
504 | - dbkey: galGal3
505 | description: 'Chicken (Gallus gallus): galGal3 Full'
506 | id: galGal3
507 | indexers:
508 | - data_manager_hisat2_index_builder
509 |
510 | - dbkey: mm9
511 | description: 'Mouse (Mus musculus): mm9'
512 | id: mm9
513 | indexers:
514 | - data_manager_hisat2_index_builder
515 | - data_manager_star_index_builder
516 |
517 | - dbkey: hg_g1k_v37
518 | description: 'Human (Homo sapiens) (b37): hg_g1k_v37'
519 | id: hg_g1k_v37
520 | indexers:
521 | - data_manager_hisat2_index_builder
522 |
523 | - dbkey: susScr3
524 | description: 'Pig (Sus scrofa): susScr3'
525 | id: susScr3
526 | indexers:
527 | - data_manager_hisat2_index_builder
528 |
529 | - dbkey: ce10
530 | description: 'C. elegans (WS220): ce10'
531 | id: ce10
532 | indexers:
533 | - data_manager_hisat2_index_builder
534 | - data_manager_star_index_builder
535 |
536 | - dbkey: hg19
537 | description: 'Human (Homo sapiens) (b37): hg19'
538 | id: hg19
539 | indexers:
540 | - data_manager_hisat2_index_builder
541 | - data_manager_star_index_builder
542 |
543 | - dbkey: hg19
544 | description: 'Human (Homo sapiens) (b37): hg19 Canonical Female'
545 | id: hg19female
546 | indexers:
547 | - data_manager_hisat2_index_builder
548 |
549 | - dbkey: hg19
550 | description: 'Human (Homo sapiens) (b37): hg19 Canonical'
551 | id: hg19canon
552 | indexers:
553 | - data_manager_hisat2_index_builder
554 |
555 | - dbkey: hg38
556 | description: 'Human (Homo sapiens) (b38): hg38 Canonical Female'
557 | id: hg38female
558 | indexers:
559 | - data_manager_hisat2_index_builder
560 |
561 | - dbkey: hg38
562 | description: 'Human (Homo sapiens) (b38): hg38 Canonical'
563 | id: hg38canon
564 | indexers:
565 | - data_manager_hisat2_index_builder
566 |
567 | - dbkey: hg38
568 | description: 'Human (Homo sapiens) (b38): hg38'
569 | id: hg38
570 | indexers:
571 | - data_manager_hisat2_index_builder
572 | - data_manager_star_index_builder
573 |
574 | - description: NCBI-2015-10-05
575 | id: ncbi-2015-10-05
576 | indexers:
577 | - data_manager_fetch_ncbi_taxonomy
578 |
579 | - description: 22Gv1.1
580 | id: 22Gv1.1
581 | indexers:
582 | - data_manager_plant_tribes_scaffolds_downloader
583 |
584 | - description: 26 plant genomes (Angiosperms clusters, version 2.0)
585 | id: 26Gv2.0
586 | indexers:
587 | - data_manager_plant_tribes_scaffolds_downloader
588 |
589 | - description: 37 plant genomes (Angiosperms clusters, version 1.0)
590 | id: 37Gv1.0
591 | indexers:
592 | - data_manager_plant_tribes_scaffolds_downloader
593 |
594 | - dbkey: rn5
595 | description: 'Rat (Rattus norvegicus): rn5'
596 | id: rn5
597 | indexers:
598 | - data_manager_star_index_builder
599 |
--------------------------------------------------------------------------------
/data_managers.yml:
--------------------------------------------------------------------------------
1 | data_manager_fetch_genome_dbkeys_all_fasta:
2 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_genome_dbkeys_all_fasta/data_manager_fetch_genome_all_fasta_dbkey/0.0.4'
3 | tags:
4 | - fetch_source
5 | data_manager_bowtie2_index_builder:
6 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_bowtie2_index_builder/bowtie2_index_builder_data_manager/2.3.4.3'
7 | tags:
8 | - genome
9 | data_manager_bwa_mem_index_builder:
10 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_bwa_mem_index_builder/bwa_mem_index_builder_data_manager/0.0.3'
11 | tags:
12 | - genome
13 | parameters:
14 | index_algorithm: bwtsw
15 | data_manager_hisat_index_builder:
16 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_hisat_index_builder/hisat_index_builder_data_manager/1.0.0'
17 | tags:
18 | - genome
19 | data_manager_twobit_builder:
20 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_twobit_builder/twobit_builder_data_manager/0.0.2'
21 | tags:
22 | - genome
23 | data_manager_picard_index_builder:
24 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_picard_index_builder/picard_index_builder_data_manager/2.7.1'
25 | tags:
26 | - genome
27 | data_manager_sam_fasta_index_builder:
28 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_sam_fasta_index_builder/sam_fasta_index_builder/0.0.2'
29 | tags:
30 | - genome
31 | data_manager_hisat2_index_builder:
32 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_hisat2_index_builder/hisat2_index_builder_data_manager/2.0.5'
33 | tags:
34 | - genome
35 | data_manager_star_index_builder:
36 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_star_index_builder/rna_star_index_builder_data_manager/2.7.4a+galaxy1'
37 | tags:
38 | - genome
39 | data_manager_bowtie_index_builder:
40 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_bowtie_index_builder/bowtie_color_space_index_builder_data_manager/0.0.2'
41 | tags:
42 | - genome
43 | data_manager_kallisto_index_builder:
44 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_kallisto_index_builder/kallisto_index_builder_data_manager/0.43.1'
45 | tags:
46 | - genome
47 | data_manager_snpeff:
48 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_snpeff/data_manager_snpeff_databases/4.3r'
49 | tags:
50 | - snpeff
51 | data_manager_plant_tribes_scaffolds_downloader:
52 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_plant_tribes_scaffolds_downloader/data_manager_plant_tribes_scaffolds_download/1.1.0'
53 | tags:
54 | - plant_source
55 | data_manager_fetch_ncbi_taxonomy:
56 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_ncbi_taxonomy/ncbi_taxonomy_fetcher/1.0.0'
57 | tags:
58 | - tax_source
59 | data_manager_gemini_database_downloader:
60 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_gemini_database_downloader/data_manager_gemini_download/0.20.1'
61 | tags:
62 | - gemini
63 | data_manager_build_kraken2_database:
64 | tool_id: toolshed.g2.bx.psu.edu/repos/iuc/data_manager_build_kraken2_database/kraken2_build_database/2.1.2+galaxy1
65 | tags:
66 | - kraken2
67 | data_manager_build_bracken_database:
68 | tool_id: toolshed.g2.bx.psu.edu/repos/iuc/data_manager_build_bracken_database/bracken_build_database/2.8+galaxy0
69 | parameters:
70 | kraken_db: "{{ item.id }}"
71 | check_prebuilt:
72 | prebuilt: yes
73 | tags:
74 | - bracken
75 |
--------------------------------------------------------------------------------
/genomes.yml:
--------------------------------------------------------------------------------
1 | genomes:
2 | - dbkey: dm6
3 | description: # set from UCSC
4 | id: dm6
5 | source: ucsc
6 | doi:
7 | version:
8 | checksum:
9 | blob:
10 | indexers:
11 | - data_manager_bowtie2_index_builder
12 | - data_manager_bwa_mem_index_builder
13 | - data_manager_twobit_builder
14 | - data_manager_picard_index_builder
15 | - data_manager_sam_fasta_index_builder
16 | - data_manager_hisat2_index_builder
17 | - data_manager_star_index_builder
18 | #- data_manager_bowtie_index_builder
19 | #- data_manager_kallisto_index_builder
20 | skiplist:
21 | - bfast
22 | - dbkey: danRer10
23 | description: # set from UCSC
24 | id: danRer10
25 | source: ucsc
26 | doi:
27 | version:
28 | checksum:
29 | blob:
30 | indexers:
31 | - data_manager_bowtie2_index_builder
32 | - data_manager_bwa_mem_index_builder
33 | - data_manager_twobit_builder
34 | - data_manager_picard_index_builder
35 | - data_manager_sam_fasta_index_builder
36 | - data_manager_hisat2_index_builder
37 | - data_manager_star_index_builder
38 | #- data_manager_bowtie_index_builder
39 | #- data_manager_kallisto_index_builder
40 | skiplist:
41 | - bfast
42 | - dbkey: sacCer3
43 | description: # set from UCSC
44 | id: sacCer3
45 | source: ucsc
46 | doi:
47 | version:
48 | checksum:
49 | blob:
50 | indexers:
51 | - data_manager_bowtie2_index_builder
52 | - data_manager_bwa_mem_index_builder
53 | - data_manager_twobit_builder
54 | - data_manager_picard_index_builder
55 | - data_manager_sam_fasta_index_builder
56 | - data_manager_hisat2_index_builder
57 | - data_manager_star_index_builder
58 | #- data_manager_bowtie_index_builder
59 | #- data_manager_kallisto_index_builder
60 | skiplist:
61 | - bfast
62 | - dbkey: Ecoli-O157-H7-Sakai
63 | description: "Escherichia coli O157-H7 Sakai"
64 | id: Ecoli-O157-H7-Sakai
65 | source: https://swift.rc.nectar.org.au:8888/v1/AUTH_377/public/COMP90014/Assignment1/Ecoli-O157_H7-Sakai-chr.fna
66 | doi:
67 | version:
68 | checksum: 1d769fcb47f631c359e0b9407155e34325b223fba4d1f208f7ad8c353f5ab560
69 | blob:
70 | indexers:
71 | - data_manager_bowtie2_index_builder
72 | - data_manager_bwa_mem_index_builder
73 | - data_manager_twobit_builder
74 | - data_manager_picard_index_builder
75 | - data_manager_sam_fasta_index_builder
76 | - data_manager_hisat2_index_builder
77 | - data_manager_star_index_builder
78 | #- data_manager_bowtie_index_builder
79 | #- data_manager_kallisto_index_builder
80 | skiplist:
81 | - bfast
82 | - dbkey: Salm-enterica-Newport
83 | description: "Salmonella enterica subsp. enterica serovar Newport str. USMARC-S3124.1"
84 | id: Salm-enterica-Newport
85 | source: NC_021902
86 | doi:
87 | version:
88 | checksum: 8d557e48aa3268afd0177a537a9ae396e6a6c123e4e0d5386bc1fa5a18c3054b
89 | blob: "Extra information for this genome is at: https://www.ncbi.nlm.nih.gov/genome/152?genome_assembly_id=299243"
90 | indexers:
91 | - data_manager_bowtie2_index_builder
92 | - data_manager_bwa_mem_index_builder
93 | - data_manager_twobit_builder
94 | - data_manager_picard_index_builder
95 | - data_manager_sam_fasta_index_builder
96 | - data_manager_hisat2_index_builder
97 | - data_manager_star_index_builder
98 | #- data_manager_bowtie_index_builder
99 | #- data_manager_kallisto_index_builder
100 | skiplist:
101 | - bfast
102 |
--------------------------------------------------------------------------------
/run_builder.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | : ${GALAXY_DOCKER_IMAGE:="quay.io/bgruening/galaxy"}
6 | : ${GALAXY_PORT:="8080"}
7 | : ${GALAXY_DEFAULT_ADMIN_USER:="admin@galaxy.org"}
8 | : ${GALAXY_DEFAULT_ADMIN_PASSWORD:="password"}
9 | : ${EXPORT_DIR:="/mnt/data/export/"}
10 | : ${DATA_MANAGER_DATA_PATH:="${EXPORT_DIR}/data_manager"}
11 |
12 | : ${PLANEMO_PROFILE_NAME:="wxflowtest"}
13 | : ${PLANEMO_SERVE_DATABASE_TYPE:="postgres"}
14 |
15 | GALAXY_URL="http://localhost:$GALAXY_PORT"
16 |
17 | git diff --name-only $TRAVIS_COMMIT_RANGE -- '*.yml' '*.yaml' > changed_files.txt
18 | echo "Following files have changed."
19 | cat changed_files.txt
20 |
21 | if [ ! -f .venv ]; then
22 | virtualenv .venv
23 | . .venv/bin/activate
24 | pip install -U pip
25 | pip install ephemeris
26 | fi
27 |
28 | echo 'ephemeris installed'
29 |
30 | . .venv/bin/activate
31 |
32 | mkdir -p ${DATA_MANAGER_DATA_PATH}
33 |
34 | sudo cp scripts/job_conf.xml ${EXPORT_DIR}/job_conf.xml
35 |
36 | docker run -d --rm -v ${EXPORT_DIR}:/export/ -e GALAXY_CONFIG_JOB_CONFIG_FILE=/export/job_conf.xml -e GALAXY_CONFIG_GALAXY_DATA_MANAGER_DATA_PATH=/export/data_manager/ -e GALAXY_CONFIG_WATCH_TOOL_DATA_DIR=True -p 8080:80 --name idc_builder ${GALAXY_DOCKER_IMAGE}
37 |
38 | echo 'Waitng for Galaxy'
39 |
40 | galaxy-wait -g ${GALAXY_URL}
41 |
42 | chmod 0777 ${DATA_MANAGER_DATA_PATH}
43 |
44 |
45 | #if [ -s changed_files.txt ]
46 | #then
47 | # for FILE in `cat changed_files.txt`;
48 | # do
49 | # if [[ $FILE == *"data-managers"* ]]; then
50 | # #### RUN single data managers
51 | # shed-tools install -d $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
52 | # run-data-managers --config $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
53 | # elif [[ $FILE == *"idc-workflows"* ]]; then
54 | # #### RUN the pipline for new genome
55 | # shed-tools install -d $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
56 | # run-data-managers --config $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
57 | # fi
58 | # done
59 | #fi
60 |
61 | echo 'Installing Data Managers'
62 | # Install the data managers
63 | _idc-data-managers-to-tools
64 | shed-tools install -t tools.yml -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
65 |
66 | echo 'Fetching new genomes'
67 | #Run make_fetch.py to build the fetch manager config file for ephemeris
68 | python scripts/make_fetch.py -g genomes.yml -x ${EXPORT_DIR}/galaxy-central/config/shed_data_manager_conf.xml
69 | #cat data_managers_fetch.yml genomes.yml > fetch.yml
70 | run-data-managers --config fetch.yml -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
71 |
72 | echo 'Restarting Galaxy'
73 | #Restart Galaxy to reload the data tables
74 | docker exec idc_builder supervisorctl restart galaxy:
75 | galaxy-wait -g ${GALAXY_URL}
76 | sleep 20
77 |
78 | echo 'Building new indices'
79 | #Run the make_dm_genomes.py script to create the list of index builders and genomes and pass it to ephemeris
80 | python scripts/make_dm_genomes.py -d data_managers_tools.yml -x ${EXPORT_DIR}/galaxy-central/config/shed_data_manager_conf.xml -g genomes.yml
81 | run-data-managers --config dm_genomes.yml -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD
82 |
83 |
84 | ls -l ${DATA_MANAGER_DATA_PATH}
85 |
86 | rm fetch.yml
87 | rm dm_genomes.yml
88 |
89 | docker stop idc_builder
90 |
--------------------------------------------------------------------------------
/scripts/cleanup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | docker stop idc_builder
4 | sudo rm -rf /mnt/data/export
5 | rm fetch.yml
6 |
--------------------------------------------------------------------------------
/scripts/job_conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | /usr/lib/slurm-drmaa/lib/libdrmaa.so
6 | true
7 |
8 |
9 | true
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | true
23 | --ntasks=32 --share
24 |
25 |
26 |
27 | true
28 | --ntasks=1 --share
29 |
30 |
31 |
32 | true
33 | --ntasks=2 --share
34 |
35 |
36 |
37 | true
38 | vanilla
39 |
40 |
41 |
42 | true
43 | vanilla
44 |
45 |
46 |
47 | 1
48 | 1
49 | 1
50 |
51 |
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/scripts/make_dm_genomes.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python
2 |
3 | import yaml
4 | import xml.etree.ElementTree as ET
5 | import re
6 | import argparse
7 |
8 |
9 | def main():
10 |
11 | VERSION = 0.1
12 |
13 | parser = argparse.ArgumentParser(description="")
14 | parser.add_argument(
15 | "-d",
16 | "--data_managers_file",
17 | required=True,
18 | help="The data managers tool .yml file.",
19 | )
20 | parser.add_argument(
21 | "-x",
22 | "--shed_data_managers_conf_file",
23 | required=True,
24 | help="Path to the shed_data_managers_conf.xml file",
25 | )
26 | parser.add_argument(
27 | "-g", "--genome_file", required=True, help="The genome yaml file to read."
28 | )
29 | parser.add_argument(
30 | "-o",
31 | "--outfile",
32 | default="dm_genomes.yml",
33 | help="The name of the output file to produce.",
34 | )
35 | parser.add_argument("--version", action="store_true")
36 | parser.add_argument("--verbose", action="store_true")
37 |
38 | args = parser.parse_args()
39 |
40 | if args.version:
41 | print("make_fetch.py version: %.1f" % VERSION)
42 | return
43 |
44 | # Set up the output dictionary
45 | out_conf = {"data_managers": [], "genomes": []}
46 |
47 | # Read in the data managers file and store the names in an array
48 | data_managers_tools = yaml.safe_load(open(args.data_managers_file, "r"))
49 | dms = []
50 | for dm in data_managers_tools["tools"]:
51 | if "genome" in dm["tags"]:
52 | dms.append(dm["name"])
53 | if args.verbose:
54 | print("Data managers array: %s" % dms)
55 |
56 | # Read in the shed_data_managers_conf.xml file and build a dictionary of
57 | # name, id and data tables to update and add them to
58 | # out_conf if they appear in dms
59 | tree = ET.parse(args.shed_data_managers_conf_file)
60 | root = tree.getroot()
61 | for data_manager in root:
62 | name = ""
63 | repo = ""
64 | tables = []
65 | for tool in data_manager:
66 | if tool.tag == "tool":
67 | for x in tool:
68 | if x.tag == "id":
69 | name = x.text
70 | elif x.tag == "repository_name":
71 | repo = x.text
72 | elif tool.tag == "data_table":
73 | tables.append(tool.attrib["name"])
74 | if repo in dms:
75 | dm = {}
76 | dm["id"] = name
77 | dm["params"] = [
78 | {"all_fasta_source": "{{ item.id }}"},
79 | {"sequence_name": "{{ item.name }}"},
80 | {"sequence_id": "{{ item.id }}"},
81 | ]
82 | if re.search("bwa", dm["id"]):
83 | dm["params"].append({"index_algorithm": "bwtsw"})
84 | if re.search("color_space", dm["id"]):
85 | continue
86 | dm["items"] = "{{ genomes }}"
87 | dm["data_table_reload"] = tables
88 | out_conf["data_managers"].append(dm)
89 |
90 | # Read in the genome file.
91 | genomes = yaml.safe_load(open(args.genome_file, "r"))
92 |
93 | out_conf["genomes"] = genomes["genomes"]
94 |
95 | with open(args.outfile, "w") as out:
96 | yaml.dump(out_conf, out, default_flow_style=False)
97 |
98 |
99 | if __name__ == "__main__":
100 | main()
101 |
--------------------------------------------------------------------------------
/scripts/make_fetch.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python
2 |
3 | import yaml
4 | import xml.etree.ElementTree as ET
5 | import re
6 | import argparse
7 |
8 |
9 | def main():
10 |
11 | VERSION = 0.2
12 |
13 | parser = argparse.ArgumentParser(description="")
14 | parser.add_argument(
15 | "-g", "--genome_file", required=True, help="The genome yaml file to read."
16 | )
17 | parser.add_argument(
18 | "-x",
19 | "--shed_data_managers_conf_file",
20 | required=True,
21 | help="Path to the shed_data_managers_conf.xml file",
22 | )
23 | parser.add_argument(
24 | "-o",
25 | "--outfile",
26 | default="fetch.yml",
27 | help="The name of the output file to produce.",
28 | )
29 | parser.add_argument("--version", action="store_true")
30 | parser.add_argument("--verbose", action="store_true")
31 |
32 | args = parser.parse_args()
33 |
34 | if args.version:
35 | print("make_fetch.py version: %.1f" % VERSION)
36 | return
37 |
38 | # Read in the shed_data_managers_tool file to get the id of the fetch dm tool.
39 | tree = ET.parse(args.shed_data_managers_conf_file)
40 | root = tree.getroot()
41 | fetch_dm_tool = ""
42 | for data_manager in root:
43 | for tool in data_manager:
44 | if tool.tag == "tool":
45 | for x in tool:
46 | if x.tag == "id":
47 | if re.search(r"fetch_genome", x.text):
48 | fetch_dm_tool = x.text
49 |
50 | # Read in the genome file.
51 | genomes = yaml.safe_load(open(args.genome_file, "r"))
52 |
53 | dm = {"data_managers": []}
54 |
55 | for genome in genomes["genomes"]:
56 | # make the start
57 | out = {"id": fetch_dm_tool}
58 | out["params"] = []
59 | out["params"].append({"dbkey_source|dbkey": genome["id"]})
60 | if genome["source"] == "ucsc":
61 | out["params"].append({"reference_source|reference_source_selector": "ucsc"})
62 | out["params"].append({"reference_source|requested_dbkey": genome["id"]})
63 | elif re.match("^[A-Z_]+[0-9.]+", genome["source"]):
64 | out["params"].append({"dbkey_source|dbkey_source_selector": "new"})
65 | out["params"].append({"reference_source|reference_source_selector": "ncbi"})
66 | out["params"].append(
67 | {"reference_source|requested_identifier": genome["source"]}
68 | )
69 | out["params"].append({"sequence_name": genome["description"]})
70 | out["params"].append({"sequence.id": genome["id"]})
71 | elif re.match("^http", genome["source"]):
72 | out["params"].append({"dbkey_source|dbkey_source_selector": "new"})
73 | out["params"].append({"reference_source|reference_source_selector": "url"})
74 | out["params"].append({"reference_source|user_url": genome["source"]})
75 | out["params"].append({"sequence_name": genome["description"]})
76 | out["params"].append({"sequence.id": genome["id"]})
77 | out["data_table_reload"] = ["all_fasta", "__dbkeys__"]
78 |
79 | dm["data_managers"].append(out)
80 |
81 | with open(args.outfile, "w") as out:
82 | yaml.dump(dm, out, default_flow_style=False)
83 |
84 |
85 | if __name__ == "__main__":
86 | main()
87 |
--------------------------------------------------------------------------------