├── .ci ├── cvmfs-fuse.conf ├── get-bundle-url.py ├── jenkins.sh └── repos.conf ├── .gitignore ├── LICENSE ├── README.md ├── ansible ├── .pass ├── ansible.cfg ├── files │ └── idc │ │ ├── bootstrap.sh │ │ ├── data_manager_conf.xml │ │ └── workflow_schedulers_conf.xml ├── group_vars │ ├── idc_builders.yaml │ └── js2.yaml ├── inventory.yaml ├── playbook-launch.yaml ├── playbook-teardown.yaml ├── requirements.txt ├── requirements.yaml ├── roles │ └── openstack │ │ └── tasks │ │ ├── clean.yml │ │ ├── main.yml │ │ ├── secrets.yml │ │ └── spawn.yml └── templates │ ├── idc │ ├── object_store_conf.xml.j2 │ └── tpv │ │ └── idc.yaml.j2 │ └── nginx │ └── idc-build.j2 ├── config └── tool_data_table_conf.xml ├── cvmfs_current_managed ├── cvmfs_data_managers.yml └── cvmfs_managed_genomes.yml ├── data_managers.yml ├── genomes.yml ├── run_builder.sh └── scripts ├── cleanup.sh ├── job_conf.xml ├── make_dm_genomes.py └── make_fetch.py /.ci/cvmfs-fuse.conf: -------------------------------------------------------------------------------- 1 | CVMFS_CACHE_BASE=${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache 2 | CVMFS_RELOAD_SOCKETS=${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache 3 | CVMFS_USYSLOG=${WORKSPACE}/${BUILD_NUMBER}/cvmfs.log 4 | CVMFS_CLAIM_OWNERSHIP=yes 5 | CVMFS_SERVER_URL="http://cvmfs1-psu0.galaxyproject.org/cvmfs/@fqrn@;http://cvmfs1-iu0.galaxyproject.org/cvmfs/@fqrn@;http://cvmfs1-tacc0.galaxyproject.org/cvmfs/@fqrn@" 6 | CVMFS_KEYS_DIR=/etc/cvmfs/keys/galaxyproject.org 7 | CVMFS_HTTP_PROXY=DIRECT 8 | # this is critical for release managers (and thus for our faux-release-manager) because ext4's xattr space is too small 9 | # to copy the cvmfs client's xattrs, yielding ENOSPC (No space left on device) for any copy-on-writes 10 | CVMFS_HIDE_MAGIC_XATTRS=yes 11 | -------------------------------------------------------------------------------- /.ci/get-bundle-url.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import os 4 | 5 | import requests 6 | from bioblend.galaxy import GalaxyInstance 7 | 8 | 9 | EXT = 'data_manager_json' 10 | 11 | parser = argparse.ArgumentParser(description="") 12 | parser.add_argument( 13 | "-g", "--galaxy-url", default="http://localhost:8080", help="The Galaxy server URL" 14 | ) 15 | parser.add_argument( 16 | "-u", "--galaxy-user", default="idc@galaxyproject.org", help="Galaxy user email" 17 | ) 18 | parser.add_argument( 19 | "-p", "--galaxy-password", help="Galaxy user password (or set $IDC_USER_PASS)" 20 | ) 21 | parser.add_argument( 22 | "-a", "--galaxy-api-key", help="Galaxy API key (or set $EPHEMERIS_API_KEY)" 23 | ) 24 | parser.add_argument( 25 | "-n", "--history-name", default="Data Manager History (automatically created)", help="History name" 26 | ) 27 | parser.add_argument( 28 | "-r", "--record-file", help="Record file" 29 | ) 30 | args = parser.parse_args() 31 | 32 | api_key = args.galaxy_api_key or os.environ.get("EPHEMERIS_API_KEY") 33 | password = args.galaxy_password or os.environ.get("IDC_USER_PASS") 34 | if api_key: 35 | auth_kwargs = {"key": api_key} 36 | elif password: 37 | auth_kwargs = {"email": args.galaxy_user, "password": password} 38 | else: 39 | raise RuntimeError("No Galaxy credentials supplied") 40 | 41 | gi = GalaxyInstance(url=args.galaxy_url, **auth_kwargs) 42 | 43 | history = gi.histories.get_histories(name=args.history_name, deleted=False)[0] 44 | history_id = history['id'] 45 | datasets = gi.datasets.get_datasets( 46 | history_id=history_id, extension=EXT, order="create_time-dsc" 47 | ) 48 | dataset_id = datasets[0]['id'] 49 | 50 | bundle_url = f"{args.galaxy_url}/api/datasets/{dataset_id}/display?to_ext={EXT}" 51 | 52 | if args.record_file: 53 | with open(args.record_file, "w") as fh: 54 | fh.write(f"galaxy_url: {args.galaxy_url}\n") 55 | fh.write(f"history_id: {history_id}\n") 56 | fh.write(f"history_url: {args.galaxy_url}/{history['url']}\n") 57 | fh.write(f"bundle_dataset_id: {dataset_id}\n") 58 | fh.write(f"bundle_dataset_url: {bundle_url}\n") 59 | 60 | print(bundle_url) 61 | -------------------------------------------------------------------------------- /.ci/jenkins.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # Set this variable to 'true' to publish on successful installation 5 | : ${PUBLISH:=false} 6 | 7 | BUILD_GALAXY_URL="http://idc-build" 8 | PUBLISH_GALAXY_URL="https://usegalaxy.org" 9 | SSH_MASTER_SOCKET_DIR="${HOME}/.cache/idc" 10 | MAIN_BRANCH='main' 11 | 12 | # Set to 'centos:...' or 'rockylinux:...' and set GALAXY_GIT_* or GALAXY_SERVER_DIR below to use a clone 13 | IMPORT_DOCKER_IMAGE='rockylinux:8' 14 | # Disable if using a locally built image e.g. for debugging 15 | IMPORT_DOCKER_IMAGE_PULL=true 16 | 17 | #GALAXY_TEMPLATE_DB_URL='https://raw.githubusercontent.com/davebx/galaxyproject-sqlite/master/20.01.sqlite' 18 | #GALAXY_TEMPLATE_DB="${GALAXY_TEMPLATE_DB_URL##*/}" 19 | # Unset to use create_db.py, which is fast now that it doesn't migrate new DBs 20 | GALAXY_TEMPLATE_DB_URL= 21 | GALAXY_TEMPLATE_DB='galaxy.sqlite' 22 | 23 | EPHEMERIS="git+https://github.com/mvdbeek/ephemeris.git@dm_parameters#egg_name=ephemeris" 24 | GALAXY_MAINTENANCE_SCRIPTS="git+https://github.com/mvdbeek/galaxy-maintenance-scripts.git@avoid_galaxy_app#egg_name=galaxy-maintenance-scripts" 25 | 26 | # Should be set by Jenkins, so the default here is for development 27 | : ${GIT_COMMIT:=$(git rev-parse HEAD)} 28 | 29 | # Set to true to perform everything on the Jenkins worker and copy results to the Stratum 0 for publish, instead of 30 | # performing everything directly on the Stratum 0. Requires preinstallation/preconfiguration of CVMFS and for 31 | # fuse-overlayfs to be installed on Jenkins workers. 32 | USE_LOCAL_OVERLAYFS=false 33 | 34 | # Set to true to run the importer in a docker container 35 | USE_DOCKER="$USE_LOCAL_OVERLAYFS" 36 | 37 | REMOTE_PYTHON=/opt/rh/rh-python38/root/usr/bin/python3 38 | REMOTE_WORKDIR_PARENT=/srv/idc 39 | 40 | # $EPHEMERIS_API_KEY and $IDC_VAULT_PASS should be set in the environment 41 | 42 | # 43 | # Development/debug options 44 | # 45 | 46 | # 47 | # Ensure that everything is defined for set -u 48 | # 49 | 50 | DM_STAGE=0 51 | TOOL_YAMLS=() 52 | REPO_USER= 53 | REPO_STRATUM0= 54 | SSH_MASTER_SOCKET= 55 | WORKDIR= 56 | REMOTE_WORKDIR= 57 | USER_UID="$(id -u)" 58 | USER_GID="$(id -g)" 59 | OVERLAYFS_UPPER= 60 | OVERLAYFS_LOWER= 61 | OVERLAYFS_WORK= 62 | OVERLAYFS_MOUNT= 63 | EPHEMERIS_BIN= 64 | GALAXY_MAINTENANCE_SCRIPTS_BIN= 65 | 66 | SSH_MASTER_UP=false 67 | CVMFS_TRANSACTION_UP=false 68 | IMPORT_CONTAINER_UP=false 69 | LOCAL_CVMFS_MOUNTED=false 70 | LOCAL_OVERLAYFS_MOUNTED=false 71 | BUILD_GALAXY_UP=false 72 | 73 | 74 | function trap_handler() { 75 | { set +x; } 2>/dev/null 76 | # return to original dir 77 | while popd 2>/dev/null; do :; done || true 78 | $IMPORT_CONTAINER_UP && stop_import_container 79 | clean_preconfigured_container 80 | $LOCAL_CVMFS_MOUNTED && unmount_overlay 81 | # $LOCAL_OVERLAYFS_MOUNTED does not need to be checked here since if it's true, $LOCAL_CVMFS_MOUNTED must be true 82 | $CVMFS_TRANSACTION_UP && abort_transaction 83 | $BUILD_GALAXY_UP && stop_build_galaxy 84 | clean_workspace 85 | [ -n "$WORKSPACE" ] && log_exec rm -rf "$WORKSPACE" 86 | $SSH_MASTER_UP && [ -n "$REMOTE_WORKDIR" ] && exec_on rm -rf "$REMOTE_WORKDIR" 87 | $SSH_MASTER_UP && stop_ssh_control 88 | return 0 89 | } 90 | trap "trap_handler" SIGTERM SIGINT ERR EXIT 91 | 92 | 93 | function log() { 94 | [ -t 0 ] && echo -e '\033[1;32m#' "$@" '\033[0m' || echo '#' "$@" 95 | } 96 | 97 | 98 | function log_error() { 99 | [ -t 0 ] && echo -e '\033[0;31mERROR:' "$@" '\033[0m' || echo 'ERROR:' "$@" 100 | } 101 | 102 | 103 | function log_debug() { 104 | echo "####" "$@" 105 | } 106 | 107 | 108 | function log_exec() { 109 | local rc 110 | if $USE_LOCAL_OVERLAYFS && ! $SSH_MASTER_UP; then 111 | set -x 112 | eval "$@" 113 | else 114 | set -x 115 | "$@" 116 | fi 117 | { rc=$?; set +x; } 2>/dev/null 118 | return $rc 119 | } 120 | 121 | 122 | function log_exit_error() { 123 | log_error "$@" 124 | exit 1 125 | } 126 | 127 | 128 | function log_exit() { 129 | echo "$@" 130 | exit 0 131 | } 132 | 133 | 134 | function exec_on() { 135 | if $USE_LOCAL_OVERLAYFS && ! $SSH_MASTER_UP; then 136 | log_exec "$@" 137 | else 138 | log_exec ssh -S "$SSH_MASTER_SOCKET" -l "$REPO_USER" "$REPO_STRATUM0" -- "$@" 139 | fi 140 | } 141 | 142 | 143 | function copy_to() { 144 | local file="$1" 145 | if $USE_LOCAL_OVERLAYFS && ! $SSH_MASTER_UP; then 146 | log_exec cp "$file" "${WORKDIR}/${file##*}" 147 | else 148 | log_exec scp -o "ControlPath=$SSH_MASTER_SOCKET" "$file" "${REPO_USER}@${REPO_STRATUM0}:${REMOTE_WORKDIR}/${file##*/}" 149 | fi 150 | } 151 | 152 | 153 | function check_bot_command() { 154 | log 'Checking for Github PR Bot commands' 155 | log_debug "Value of \$ghprbCommentBody is: ${ghprbCommentBody:-UNSET}" 156 | case "${ghprbCommentBody:-UNSET}" in 157 | "@galaxybot deploy"*) 158 | PUBLISH=true 159 | ;; 160 | esac 161 | if $PUBLISH; then 162 | log "Publish requested; running build and import" 163 | else 164 | log "Publish not requested, exiting" 165 | exit 0 166 | fi 167 | } 168 | 169 | 170 | function load_repo_configs() { 171 | log 'Loading repository configs' 172 | . ./.ci/repos.conf 173 | } 174 | 175 | 176 | function detect_changes() { 177 | REPO=idc 178 | 179 | log "Getting repo for: ${REPO}" 180 | REPO="${REPOS[$REPO]}" 181 | declare -p REPO 182 | } 183 | 184 | 185 | function set_repo_vars() { 186 | REPO_USER="${REPO_USERS[$REPO]}" 187 | REPO_STRATUM0="${REPO_STRATUM0S[$REPO]}" 188 | CONTAINER_NAME="idc-${REPO_USER}-${BUILD_NUMBER}" 189 | if $USE_LOCAL_OVERLAYFS; then 190 | OVERLAYFS_LOWER="${WORKSPACE}/${BUILD_NUMBER}/lower" 191 | OVERLAYFS_UPPER="${WORKSPACE}/${BUILD_NUMBER}/upper" 192 | OVERLAYFS_WORK="${WORKSPACE}/${BUILD_NUMBER}/work" 193 | OVERLAYFS_MOUNT="${WORKSPACE}/${BUILD_NUMBER}/mount" 194 | CVMFS_CACHE="${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache" 195 | else 196 | OVERLAYFS_UPPER="/var/spool/cvmfs/${REPO}/scratch/current" 197 | OVERLAYFS_LOWER="/var/spool/cvmfs/${REPO}/rdonly" 198 | OVERLAYFS_MOUNT="/cvmfs/${REPO}" 199 | fi 200 | } 201 | 202 | 203 | function setup_ansible() { 204 | log "Setting up Ansible" 205 | log_exec python3 -m venv ansible-venv 206 | . ./ansible-venv/bin/activate 207 | log_exec pip install --upgrade pip wheel 208 | pushd ansible 209 | log_exec pip install -r requirements.txt 210 | log_exec ansible-galaxy role install -p roles -r requirements.yaml 211 | log_exec ansible-galaxy collection install -p collections -r requirements.yaml 212 | popd 213 | deactivate 214 | } 215 | 216 | 217 | function setup_ephemeris() { 218 | # Sets global $EPHEMERIS_BIN 219 | EPHEMERIS_BIN="$(pwd)/ephemeris/bin" 220 | log "Setting up Ephemeris" 221 | log_exec python3 -m venv ephemeris 222 | log_exec "${EPHEMERIS_BIN}/pip" install --upgrade pip wheel 223 | log_exec "${EPHEMERIS_BIN}/pip" install --index-url https://wheels.galaxyproject.org/simple/ \ 224 | --extra-index-url https://pypi.org/simple/ "${EPHEMERIS:=ephemeris}" 225 | } 226 | 227 | 228 | function setup_remote_ephemeris() { 229 | # Sets global $EPHEMERIS_BIN 230 | EPHEMERIS_BIN="${REMOTE_WORKDIR}/ephemeris/bin" 231 | log "Setting up remote Ephemeris" 232 | exec_on "$REMOTE_PYTHON" -m venv "${REMOTE_WORKDIR}/ephemeris" 233 | exec_on "${EPHEMERIS_BIN}/pip" install --upgrade pip wheel 234 | # urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'OpenSSL 1.0.2k-fips 26 Jan 2017'. See: https://github.com/urllib3/urllib3/issues/2168 235 | exec_on "${EPHEMERIS_BIN}/pip" install --index-url https://wheels.galaxyproject.org/simple/ \ 236 | --extra-index-url https://pypi.org/simple/ "${BIOBLEND:=bioblend}" "${EPHEMERIS:=ephemeris}" "'urllib3<2'" 237 | } 238 | 239 | 240 | function setup_galaxy_maintenance_scripts() { 241 | # Sets global $GALAXY_MAINTENANCE_SCRIPTS 242 | local venv="${1:-.}/galaxy-maintenance-scripts" 243 | local python="${2:-python3}" 244 | GALAXY_MAINTENANCE_SCRIPTS_BIN="${venv}/bin" 245 | log "Setting up Galaxy Maintenance Scripts" 246 | exec_on "$python" -m venv "$venv" 247 | exec_on "${venv}/bin/pip" install --upgrade pip wheel 248 | exec_on "${venv}/bin/pip" install --index-url https://wheels.galaxyproject.org/simple/ \ 249 | --extra-index-url https://pypi.org/simple/ "$GALAXY_MAINTENANCE_SCRIPTS" "'urllib3<2'" 250 | } 251 | 252 | 253 | function verify_cvmfs_revision() { 254 | log "Verifying that CVMFS Client and Stratum 0 are in sync" 255 | local cvmfs_io_sock="${WORKSPACE}/${BUILD_NUMBER}/cvmfs-cache/${REPO}/cvmfs_io.${REPO}" 256 | local stratum0_published_url="http://${REPO_STRATUM0}/cvmfs/${REPO}/.cvmfspublished" 257 | local client_rev=$(cvmfs_talk -p "$cvmfs_io_sock" revision) 258 | local stratum0_rev=$(curl -s "$stratum0_published_url" | awk -F '^--$' '{print $1} NF>1{exit}' | grep '^S' | sed 's/^S//') 259 | if [ -z "$client_rev" ]; then 260 | log_exit_error "Failed to detect client revision" 261 | elif [ -z "$stratum0_rev" ]; then 262 | log_exit_error "Failed to detect Stratum 0 revision" 263 | elif [ "$client_rev" -ne "$stratum0_rev" ]; then 264 | log_exit_error "Importer client revision '${client_rev}' does not match Stratum 0 revision '${stratum0_rev}'" 265 | fi 266 | 267 | log "${REPO} is revision ${client_rev}" 268 | } 269 | 270 | 271 | function mount_overlay() { 272 | log "Mounting OverlayFS/CVMFS" 273 | log_debug "\$JOB_NAME: ${JOB_NAME}, \$WORKSPACE: ${WORKSPACE}, \$BUILD_NUMBER: ${BUILD_NUMBER}" 274 | log_exec mkdir -p "$OVERLAYFS_LOWER" "$OVERLAYFS_UPPER" "$OVERLAYFS_WORK" "$OVERLAYFS_MOUNT" "$CVMFS_CACHE" 275 | log_exec cvmfs2 -o config=.ci/cvmfs-fuse.conf,allow_root "$REPO" "$OVERLAYFS_LOWER" 276 | LOCAL_CVMFS_MOUNTED=true 277 | verify_cvmfs_revision 278 | log_exec fuse-overlayfs \ 279 | -o "lowerdir=${OVERLAYFS_LOWER},upperdir=${OVERLAYFS_UPPER},workdir=${OVERLAYFS_WORK},allow_root" \ 280 | "$OVERLAYFS_MOUNT" 281 | LOCAL_OVERLAYFS_MOUNTED=true 282 | } 283 | 284 | 285 | function unmount_overlay() { 286 | log "Unmounting OverlayFS/CVMFS" 287 | if $LOCAL_OVERLAYFS_MOUNTED; then 288 | log_exec fusermount -u "$OVERLAYFS_MOUNT" 289 | LOCAL_OVERLAYFS_MOUNTED=false 290 | fi 291 | # DEBUG: what is holding this? 292 | log_exec fuser -v "$OVERLAYFS_LOWER" || true 293 | # Attempt to kill anything still accessing lower so unmount doesn't fail 294 | log_exec fuser -v -k "$OVERLAYFS_LOWER" || true 295 | log_exec fusermount -u "$OVERLAYFS_LOWER" 296 | LOCAL_CVMFS_MOUNTED=false 297 | } 298 | 299 | 300 | function start_ssh_control() { 301 | log "Starting SSH control connection to Stratum 0" 302 | SSH_MASTER_SOCKET="${SSH_MASTER_SOCKET_DIR}/ssh-tunnel-${REPO_USER}-${REPO_STRATUM0}.sock" 303 | log_exec mkdir -p "$SSH_MASTER_SOCKET_DIR" 304 | log_exec ssh -M -S "$SSH_MASTER_SOCKET" -Nfn -l "$REPO_USER" "$REPO_STRATUM0" 305 | USER_UID=$(exec_on id -u) 306 | USER_GID=$(exec_on id -g) 307 | SSH_MASTER_UP=true 308 | } 309 | 310 | 311 | function stop_ssh_control() { 312 | log "Stopping SSH control connection to Stratum 0" 313 | log_exec ssh -S "$SSH_MASTER_SOCKET" -O exit -l "$REPO_USER" "$REPO_STRATUM0" 314 | rm -f "$SSH_MASTER_SOCKET" 315 | SSH_MASTER_UP=false 316 | } 317 | 318 | 319 | function begin_transaction() { 320 | # $1 >= 0 number of seconds to retry opening transaction for 321 | local max_wait="${1:--1}" 322 | local start=$(date +%s) 323 | local elapsed='-1' 324 | local sleep='4' 325 | local max_sleep='60' 326 | log "Opening transaction on $REPO" 327 | while ! exec_on cvmfs_server transaction "$REPO"; do 328 | log "Failed to open CVMFS transaction on ${REPO}" 329 | if [ "$max_wait" -eq -1 ]; then 330 | log_exit_error 'Transaction open retry disabled, giving up!' 331 | elif [ "$elapsed" -ge "$max_wait" ]; then 332 | log_exit_error "Time waited (${elapsed}s) exceeds limit (${max_wait}s), giving up!" 333 | fi 334 | log "Will retry in ${sleep}s" 335 | sleep $sleep 336 | [ $sleep -ne $max_sleep ] && let sleep="${sleep}*2" 337 | [ $sleep -gt $max_sleep ] && sleep="$max_sleep" 338 | let elapsed="$(date +%s)-${start}" 339 | done 340 | CVMFS_TRANSACTION_UP=true 341 | } 342 | 343 | 344 | function abort_transaction() { 345 | log "Aborting transaction on $REPO" 346 | exec_on cvmfs_server abort -f "$REPO" 347 | CVMFS_TRANSACTION_UP=false 348 | } 349 | 350 | 351 | function publish_transaction() { 352 | log "Publishing transaction on $REPO" 353 | exec_on "cvmfs_server publish -a 'idc-${GIT_COMMIT:0:7}.${DM_STAGE}' -m 'Automated data installation for commit ${GIT_COMMIT}' ${REPO}" 354 | CVMFS_TRANSACTION_UP=false 355 | } 356 | 357 | 358 | function create_workdir() { 359 | # Sets global $WORKDIR 360 | log "Creating local workdir" 361 | WORKDIR=$(log_exec mktemp -d -t idc.work.XXXXXX) 362 | } 363 | 364 | 365 | function create_remote_workdir() { 366 | # Sets global $REMOTE_WORKDIR 367 | log "Creating remote workdir" 368 | REMOTE_WORKDIR=$(exec_on mktemp -d -p "$REMOTE_WORKDIR_PARENT" -t idc.work.XXXXXX) 369 | } 370 | 371 | 372 | function prep_docker_image() { 373 | if $USE_DOCKER && $IMPORT_DOCKER_IMAGE_PULL; then 374 | log "Fetching latest Galaxy image" 375 | exec_on docker pull "$IMPORT_DOCKER_IMAGE" 376 | fi 377 | } 378 | 379 | 380 | function run_build_galaxy() { 381 | setup_ansible 382 | log "Starting Build Galaxy" 383 | # This is set beforehand so that the teardown playbook will destroy the instance if launch fails partway through 384 | BUILD_GALAXY_UP=true 385 | . ./ansible-venv/bin/activate 386 | pushd ansible 387 | log_exec ansible-playbook playbook-launch.yaml 388 | popd 389 | deactivate 390 | wait_for_cvmfs_sync 391 | } 392 | 393 | 394 | function wait_for_cvmfs_sync() { 395 | # TODO merge with verify_cvmfs_revision() used by build side 396 | # TODO: could avoid the hardcoding by using ansible but the output is harder to process 397 | local stratum0_published_url="http://${REPO_STRATUM0}/cvmfs/${REPO}/.cvmfspublished" 398 | while true; do 399 | # ensure it's mounted 400 | ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -l rocky -i ~/.ssh/id_rsa_idc_jetstream2_cvmfs idc-build ls /cvmfs/${REPO} >/dev/null 401 | local client_rev=$(ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -l rocky -i ~/.ssh/id_rsa_idc_jetstream2_cvmfs idc-build sudo cvmfs_talk -i ${REPO} revision) 402 | local stratum0_rev=$(curl -s "$stratum0_published_url" | awk -F '^--$' '{print $1} NF>1{exit}' | grep '^S' | sed 's/^S//') 403 | if [ "$client_rev" -eq "$stratum0_rev" ]; then 404 | log "${REPO} is revision ${client_rev}" 405 | break 406 | else 407 | log_debug "Builder client revision '${client_rev}' does not match Stratum 0 revision '${stratum0_rev}'" 408 | sleep 60 409 | fi 410 | done 411 | } 412 | 413 | 414 | function wait_for_build_galaxy() { 415 | log "Waiting for Galaxy" 416 | log_exec "${EPHEMERIS_BIN}/galaxy-wait" -v -g "$BUILD_GALAXY_URL" --timeout 180 || { 417 | log_error "Timed out waiting for Galaxy" 418 | #exec_on journalctl -u galaxy-gunicorn 419 | #log_debug "response from ${IMPORT_GALAXY_URL}"; 420 | curl -s "$BUILD_GALAXY_URL"; 421 | log_exit_error "Terminating build due to previous errors" 422 | } 423 | } 424 | 425 | 426 | function stop_build_galaxy() { 427 | . ./ansible-venv/bin/activate 428 | log "Stopping Build Galaxy" 429 | pushd ansible 430 | log_exec ansible-playbook playbook-teardown.yaml 431 | BUILD_GALAXY_UP=false 432 | popd 433 | deactivate 434 | } 435 | 436 | 437 | function install_data_managers() { 438 | log "Generating Data Manager tool list" 439 | log_exec _idc-data-managers-to-tools 440 | log "Installing Data Managers" 441 | log_exec shed-tools install -t tools.yml -g "$BUILD_GALAXY_URL" 442 | } 443 | 444 | 445 | function generate_data_manager_tasks() { 446 | # returns false if there are no data managers to run 447 | log "Generating Data Manager tasks" 448 | log_exec "${EPHEMERIS_BIN}/_idc-split-data-manager-genomes" -g "$PUBLISH_GALAXY_URL" --tool-id-mode short 449 | compgen -G "data_manager_tasks/*/data_manager_*/run_data_managers.yaml" >/dev/null 450 | } 451 | 452 | 453 | function run_data_managers() { 454 | # TODO: eventually these will specify their stage somehow 455 | compgen -G "data_manager_tasks/*/data_manager_fetch_genome_dbkeys_all_fasta/run_data_managers.yaml" >/dev/null && { 456 | run_stage0_data_managers 457 | } || { 458 | compgen -G "data_manager_tasks/*/data_manager_*/run_data_managers.yaml" >/dev/null && { 459 | run_stage1_data_managers 460 | } 461 | } 462 | } 463 | 464 | 465 | function run_stage0_data_managers() { 466 | local dm_config a 467 | log "Running Stage 0 Data Managers" 468 | DM_STAGE=0 469 | pushd data_manager_tasks 470 | for dm_config in */data_manager_fetch_genome_dbkeys_all_fasta/run_data_managers.yaml; do 471 | readarray -td/ a <<<"$dm_config" 472 | run_data_manager "${a[0]}" "${a[1]}" "$dm_config" 473 | done 474 | popd 475 | } 476 | 477 | 478 | function run_stage1_data_managers() { 479 | local dm_config a record 480 | log "Running Stage 1 Data Managers" 481 | DM_STAGE=1 482 | pushd data_manager_tasks 483 | for dm_config in */*/run_data_managers.yaml; do 484 | readarray -td/ a <<<"$dm_config" 485 | # this should never be false since we run either/or stage 0 or stage 1 in the caller 486 | [ "${a[1]}" != 'data_manager_fetch_genome_dbkeys_all_fasta' ] || continue 487 | run_data_manager "${a[0]}" "${a[1]}" "$dm_config" 488 | done 489 | popd 490 | } 491 | 492 | 493 | function run_data_manager() { 494 | local build_id="$1" 495 | local dm_repo_id="$2" 496 | local dm_config="$3" 497 | log "Running Data Manager '$dm_repo_id' for build '$build_id'" 498 | log_exec "${EPHEMERIS_BIN}/run-data-managers" --config "$dm_config" -g "$BUILD_GALAXY_URL" --data-manager-mode bundle --history-name "idc-${build_id}-${dm_repo_id}" 499 | } 500 | 501 | 502 | function run_container_for_preconfigure() { 503 | # Sets globals $PRECONFIGURE_CONTAINER_NAME $PRECONFIGURED_IMAGE_NAME 504 | PRECONFIGURE_CONTAINER_NAME="${CONTAINER_NAME}-preconfigure" 505 | PRECONFIGURED_IMAGE_NAME="${PRECONFIGURE_CONTAINER_NAME}d" 506 | ORIGINAL_IMAGE_NAME="$IMPORT_DOCKER_IMAGE" 507 | log "Starting import container for preconfiguration" 508 | exec_on docker run -d --name="$PRECONFIGURE_CONTAINER_NAME" \ 509 | -v "${WORKDIR}/:/work/" \ 510 | "$IMPORT_DOCKER_IMAGE" sleep infinity 511 | IMPORT_CONTAINER_UP=true 512 | } 513 | 514 | 515 | function commit_preconfigured_container() { 516 | log "Stopping and committing preconfigured container" 517 | exec_on docker kill "$PRECONFIGURE_CONTAINER_NAME" 518 | IMPORT_CONTAINER_UP=false 519 | exec_on docker commit "$PRECONFIGURE_CONTAINER_NAME" "$PRECONFIGURED_IMAGE_NAME" 520 | IMPORT_DOCKER_IMAGE="$PRECONFIGURED_IMAGE_NAME" 521 | } 522 | 523 | 524 | function clean_preconfigured_container() { 525 | [ -n "${PRECONFIGURED_IMAGE_NAME:-}" ] || return 0 526 | exec_on docker kill "$PRECONFIGURE_CONTAINER_NAME" || true 527 | exec_on docker rm -v "$PRECONFIGURE_CONTAINER_NAME" || true 528 | exec_on docker rmi -f "$PRECONFIGURED_IMAGE_NAME" || true 529 | } 530 | 531 | 532 | function generate_import_tasks() { 533 | # returns false if there is no data manager to import 534 | log "Generating import tasks" 535 | copy_to genomes.yml 536 | copy_to data_managers.yml 537 | exec_on "${EPHEMERIS_BIN}/_idc-split-data-manager-genomes" --complete-check-cvmfs "--cvmfs-root=${OVERLAYFS_LOWER}" "--merged-genomes-path=${REMOTE_WORKDIR}/genomes.yml" "--data-managers-path=${REMOTE_WORKDIR}/data_managers.yml" "--split-genomes-path=${REMOTE_WORKDIR}/import_tasks" 538 | exec_on "compgen -G '${REMOTE_WORKDIR}/import_tasks/*/data_manager_*/run_data_managers.yaml'" >/dev/null 539 | } 540 | 541 | 542 | function update_tool_data_table_conf() { 543 | # update tool_data_table_conf.xml from repo 544 | copy_to config/tool_data_table_conf.xml 545 | exec_on diff -q "${REMOTE_WORKDIR}/tool_data_table_conf.xml" "/cvmfs/${REPO}/config/tool_data_table_conf.xml" || { exec_on mkdir -p "${OVERLAYFS_MOUNT}/config" && exec_on cp "${REMOTE_WORKDIR}/tool_data_table_conf.xml" "${OVERLAYFS_MOUNT}/config/tool_data_table_conf.xml"; } 546 | } 547 | 548 | function run_import_container() { 549 | run_container_for_preconfigure 550 | log "Installing importer scripts" 551 | exec_on docker exec "$PRECONFIGURE_CONTAINER_NAME" yum install -y python39 git 552 | exec_on docker exec "$PRECONFIGURE_CONTAINER_NAME" pip3 install --upgrade pip wheel setuptools 553 | exec_on docker exec "$PRECONFIGURE_CONTAINER_NAME" /usr/local/bin/pip install "$GALAXY_MAINTENANCE_SCRIPTS" 554 | commit_preconfigured_container 555 | 556 | log "Starting importer container" 557 | exec_on docker run -d --user "${USER_UID}:${USER_GID}" --name="${CONTAINER_NAME}" \ 558 | -v "${OVERLAYFS_MOUNT}:/cvmfs/${REPO}" \ 559 | "$IMPORT_DOCKER_IMAGE" sleep infinity 560 | IMPORT_CONTAINER_UP=true 561 | } 562 | 563 | 564 | function stop_import_container() { 565 | log "Stopping importer container" 566 | # NOTE: docker rm -f exits 1 if the container does not exist 567 | exec_on docker stop "$CONTAINER_NAME" || true # try graceful shutdown first 568 | exec_on docker kill "$CONTAINER_NAME" || true # probably failed to start, don't prevent the rest of cleanup 569 | exec_on docker rm -v "$CONTAINER_NAME" || true 570 | IMPORT_CONTAINER_UP=false 571 | } 572 | 573 | 574 | function import_tool_data_bundles() { 575 | local dm_config j build_id dm_repo_id bundle_uri record_file 576 | copy_to .ci/get-bundle-url.py 577 | for dm_config in $(exec_on "compgen -G '${REMOTE_WORKDIR}/import_tasks/*/data_manager_*/run_data_managers.yaml'"); do 578 | IFS='/' read build_id dm_repo_id j <<< "${dm_config##${REMOTE_WORKDIR}/import_tasks/}" 579 | record_file="${REMOTE_WORKDIR}/import_tasks/${build_id}/${dm_repo_id}/bundle.txt" 580 | log "Importing bundle for Data Manager '$dm_repo_id' of '$build_id'" 581 | # API key is filtered from output by Jenkins 582 | local bundle_uri="$(exec_on ${EPHEMERIS_BIN}/python3 ${REMOTE_WORKDIR}/get-bundle-url.py --galaxy-url "$PUBLISH_GALAXY_URL" --history-name "idc-${build_id}-${dm_repo_id}" --record-file="$record_file" --galaxy-api-key="$EPHEMERIS_API_KEY")" 583 | [ -n "$bundle_uri" ] || log_exit_error "Could not determine bundle URI!" 584 | log_debug "bundle URI is: $bundle_uri" 585 | if $USE_DOCKER; then 586 | exec_on docker exec "$CONTAINER_NAME" mkdir -p "/cvmfs/${REPO}/data" "/cvmfs/${REPO}/record/${build_id}" 587 | exec_on docker exec "$CONTAINER_NAME" /usr/local/bin/galaxy-import-data-bundle --tool-data-path "/cvmfs/${REPO}/data" --data-table-config-path "/cvmfs/${REPO}/config/tool_data_table_conf.xml" "$bundle_uri" 588 | exec_on rsync -av "import_tasks/${build_id}/${dm_repo_id}" "${OVERLAYFS_MOUNT}/record/${build_id}" 589 | else 590 | exec_on mkdir -p "/cvmfs/${REPO}/data" "/cvmfs/${REPO}/record/${build_id}" 591 | exec_on "TMPDIR=${REMOTE_WORKDIR}" "${GALAXY_MAINTENANCE_SCRIPTS_BIN}/galaxy-import-data-bundle" --tool-data-path "/cvmfs/${REPO}/data" --data-table-config-path "/cvmfs/${REPO}/config/tool_data_table_conf.xml" "$bundle_uri" 592 | exec_on rsync -av "${REMOTE_WORKDIR}/import_tasks/${build_id}/${dm_repo_id}" "${OVERLAYFS_MOUNT}/record/${build_id}" 593 | fi 594 | done 595 | } 596 | 597 | 598 | function show_logs() { 599 | local lines= 600 | if [ -n "${1:-}" ]; then 601 | lines="--tail ${1:-}" 602 | log_debug "tail ${lines} of server log"; 603 | else 604 | log_debug "contents of server log"; 605 | fi 606 | exec_on docker logs $lines "$CONTAINER_NAME" 607 | } 608 | 609 | 610 | function show_paths() { 611 | log "contents of OverlayFS upper mount (will be published)" 612 | exec_on tree "$OVERLAYFS_UPPER" 613 | } 614 | 615 | 616 | function check_for_repo_changes() { 617 | local lower= 618 | local changes=false 619 | log "Checking for changes to repo" 620 | show_paths 621 | for config in $(exec_on "compgen -G '${OVERLAYFS_UPPER}/config/*'"); do 622 | exec_on test -f "$config" || continue 623 | lower="${OVERLAYFS_LOWER}/config/${config##*/}" 624 | exec_on test -f "$lower" || lower=/dev/null 625 | exec_on diff -q "$lower" "$config" || { changes=true; exec_on diff -u "$lower" "$config" || true; } 626 | done 627 | if ! $changes; then 628 | log_exit_error "Terminating build: expected changes to ${OVERLAYFS_UPPER}/config/* not found!" 629 | fi 630 | } 631 | 632 | 633 | function clean_workspace() { 634 | log_exec rm -rf "${WORKSPACE}/${BUILD_NUMBER}" 635 | } 636 | 637 | 638 | function post_install() { 639 | log "Running post-installation tasks" 640 | exec_on "find '$OVERLAYFS_UPPER' -perm -u+r -not -perm -o+r -not -type l -print0 | xargs -0 --no-run-if-empty chmod go+r" 641 | exec_on "find '$OVERLAYFS_UPPER' -perm -u+rx -not -perm -o+rx -not -type l -print0 | xargs -0 --no-run-if-empty chmod go+rx" 642 | } 643 | 644 | 645 | function copy_upper_to_stratum0() { 646 | log "Copying changes to Stratum 0" 647 | set -x 648 | rsync -ah -e "ssh -o ControlPath=${SSH_MASTER_SOCKET}" --stats "${OVERLAYFS_UPPER}/" "${REPO_USER}@${REPO_STRATUM0}:/cvmfs/${REPO}" 649 | { rc=$?; set +x; } 2>/dev/null 650 | return $rc 651 | } 652 | 653 | 654 | function do_import_local() { 655 | mount_overlay 656 | # TODO: we could probably replace the import container with whatever cvmfsexec does to fake a mount 657 | if generate_import_tasks; then 658 | create_workdir 659 | prep_for_galaxy_run 660 | update_tool_data_table_conf 661 | run_import_container 662 | import_tool_data_bundles 663 | check_for_repo_changes 664 | stop_import_container 665 | clean_preconfigured_container 666 | post_install 667 | else 668 | log "Nothing to import" 669 | PUBLISH=false 670 | fi 671 | if $PUBLISH; then 672 | start_ssh_control 673 | begin_transaction 600 674 | copy_upper_to_stratum0 675 | publish_transaction 676 | stop_ssh_control 677 | fi 678 | unmount_overlay 679 | } 680 | 681 | 682 | function do_import_remote() { 683 | start_ssh_control 684 | create_remote_workdir 685 | setup_remote_ephemeris 686 | # from this point forward $EPHEMERIS_BIN refers to remote 687 | if generate_import_tasks; then 688 | setup_galaxy_maintenance_scripts "$WORKDIR" "$REMOTE_PYTHON" 689 | begin_transaction 690 | update_tool_data_table_conf 691 | import_tool_data_bundles 692 | check_for_repo_changes 693 | post_install 694 | else 695 | log "Nothing to import" 696 | PUBLISH=false 697 | fi 698 | $PUBLISH && publish_transaction || abort_transaction 699 | stop_ssh_control 700 | } 701 | 702 | 703 | function main() { 704 | check_bot_command 705 | load_repo_configs 706 | detect_changes 707 | set_repo_vars 708 | setup_ephemeris 709 | if generate_data_manager_tasks; then 710 | run_build_galaxy 711 | wait_for_build_galaxy 712 | #install_data_managers 713 | run_data_managers 714 | else 715 | log "Nothing to build, will check for unimported data" 716 | fi 717 | if $USE_LOCAL_OVERLAYFS; then 718 | do_import_local 719 | else 720 | do_import_remote 721 | fi 722 | stop_build_galaxy 723 | clean_workspace 724 | return 0 725 | } 726 | 727 | 728 | main 729 | -------------------------------------------------------------------------------- /.ci/repos.conf: -------------------------------------------------------------------------------- 1 | # source me 2 | 3 | # Map toolset dirs to repo names 4 | declare -g -A REPOS=( 5 | [sandbox]=sandbox.galaxyproject.org 6 | [idc]=idc.galaxyproject.org 7 | ) 8 | 9 | # Map repo names to stratum 0s 10 | declare -g -A REPO_STRATUM0S=( 11 | [sandbox.galaxyproject.org]=cvmfs0-psu0.galaxyproject.org 12 | [idc.galaxyproject.org]=cvmfs0-psu0.galaxyproject.org 13 | ) 14 | 15 | # Map repo names to owner on stratum 0s 16 | declare -g -A REPO_USERS=( 17 | [sandbox.galaxyproject.org]=sandbox 18 | [idc.galaxyproject.org]=idc 19 | ) 20 | 21 | # Print for debugging 22 | declare -p REPOS REPO_STRATUM0S REPO_USERS 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # Simon's working directory 104 | working/ 105 | 106 | # Ansible installed artifacts 107 | ansible/collections 108 | ansible/roles/*.* 109 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Björn Grüning 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IDC - Simon's Data Club 2 | 3 | In memory of our friend and reference data champion, [Simon Gladman](https://www.biocommons.org.au/news/simon-gladman). 4 | 5 | Formerly the Intergalactic (reference) Data Commission 6 | 7 | The IDC is for Galaxy reference data what the [IUC](https://github.com/galaxyproject/tools-iuc) for Galaxy tools: A project by the Galaxy Team and Community to produce, host, and distribute reference data for use in Galaxy servers. Community contributions and Pull Request reviews are encouraged! Details on how to contribute can be found below. 8 | 9 | ### Summary 10 | 11 | This repository is the entry point to contribute to the community maintained CVMFS data repository hosting approximately 6TB of public and open reference datasets. 12 | 13 | Ultimately, it is envisioned that the set of files contained here would be modified with the addition of either a new genomic data set specification or a new data manager. Subsequent Pull Request acceptance would then fetch the genomic data, build the appropriate indices and upload everything to the proper position within the Galaxy project's CVMFS repositories. 14 | 15 | Comments/discussion on the approach and contributions are very welcome! 16 | 17 | Currently, the repository is geared to produce genomic indices for various tools using their data managers. The included `run_builder.sh` script will: 18 | 19 | 1. Create a virtualenv with the required software 20 | 2. Create a docker Galaxy instance 21 | 3. Install the data manager tools listed in `data_managers_tools.yml` 22 | 4. Dynamically create an Ephemeris .yml config file from a list of genomes and their sources 23 | 5. Fetch the genomes from the appropriate sources and install them into Galaxy's `all_fasta` data table 24 | 6. Restart Galaxy to reload the `all_fasta` data table 25 | 7. Create the tool indices using Ephemeris and the `data_managers_genomes.yml` file 26 | 27 | The resulting genome files and tool indices will be located in the directory specified in the `run_builder.sh` script in the environment variables set at the top. 28 | 29 | The two important files are: 30 | 31 | * `data_managers.yml` 32 | * `genomes.yml` 33 | 34 | ### data_managers.yml 35 | 36 | This file contains the list of data managers that are to be installed into the target 37 | Galaxy building IDC data. 38 | 39 | ```yaml 40 | NAME_OF_THE_DATA_MANAGER: 41 | tool_id: TOOL_ID_IN_TARGET_REPO_OF_DATA_MANAGER 42 | tags: 43 | - tag #Tag can be either "genome" or "fetch_source". 44 | ``` 45 | 46 | Other data managers are added as elements in the `tools` yml array. The first tool listed should always be the `fetch_source` data manager. In most cases this will be the `data_manager_fetch_genome_dbkeys_all_fasta` data manager that sources and downloads most genomes and populates the `all_fasta` and `__dbkeys__` data tables for later use by other data managers. 47 | 48 | Ephemeris can be used to generate a shed-tool install file to bootstrap the required tools 49 | and repositories into a target Galaxy for IDC installs. 50 | 51 | ```bash 52 | pip install ephemeris 53 | _idc-data-managers-to-tools 54 | # defaults to: 55 | # _idc-data-managers-to-tools --data-managers-conf=genomes.yml --shed-install-output-conf=tools.yml 56 | shed-tools install -t tools.yml 57 | ``` 58 | 59 | ### genomes.yml 60 | 61 | This is the file that contains the list of the genomes to be fetched and indexed. 62 | 63 | There is a lot more information in this file that Galaxy can currently use but its format has been specified with the future in mind. 64 | 65 | At this stage this file only needs to contain the `dbkey`, `description`, `id` and `source` fields. The rest are there as discussion points currently on the kind of information we would like to have stored with Galaxy to ensure provenance of the reference data used in analyses. 66 | 67 | Format: 68 | 69 | ```yaml 70 | genomes: 71 | - dbkey: #The dbkey of the data 72 | description: #The description of the data, including its taxonomy, version and date 73 | id: #The unique id of the data in Galaxy 74 | source: #The source of the data. Can be: 'ucsc', an NCBI accession number or a URL to a fasta file. 75 | doi: #Any DOI associated with the data 76 | version: #Any version information associated with the data 77 | checksum: #A SHA256 checksum of the original 78 | blob: #A blob for any other pertinent information 79 | indexers: #A list of tags for the types of data managers to be run on this data 80 | skiplist: # A list of data managers with the above specified tag NOT to be run on this data 81 | 82 | ``` 83 | 84 | Example: 85 | 86 | ```yaml 87 | genomes: 88 | - dbkey: dm6 89 | description: D. melanogaster Aug. 2014 (BDGP Release 6 + ISO1 MT/dm6) (dm6) 90 | id: dm6 91 | source: ucsc 92 | doi: 93 | version: 94 | checksum: 95 | blob: 96 | indexers: 97 | - genome 98 | skiplist: 99 | - bfast 100 | - dbkey: Ecoli-O157-H7-Sakai 101 | description: "Escherichia coli O157-H7 Sakai" 102 | id: Ecoli-O157-H7-Sakai 103 | source: https://swift.rc.nectar.org.au:8888/v1/AUTH_377/public/COMP90014/Assignment1/Ecoli-O157_H7-Sakai-chr.fna 104 | doi: 105 | version: 106 | checksum: 107 | blob: 108 | indexers: 109 | - genome 110 | skiplist: 111 | - bfast 112 | - dbkey: Salm-enterica-Newport 113 | description: "Salmonella enterica subsp. enterica serovar Newport str. USMARC-S3124.1" 114 | id: Salm-enterica-Newport 115 | source: NC_021902 116 | doi: 117 | version: 118 | checksum: 119 | blob: 120 | indexers: 121 | - genome 122 | skiplist: 123 | - bfast 124 | ``` 125 | 126 | ## Testing 127 | 128 | This repo can be tested using a machine with Docker installed and by a user with Docker privledges. As a warning however, some of the genomes will take a LOT (>64GB) of RAM to index. 129 | 130 | It should work just by cloning the repo to the machine, modifying the environment variables in the `run_builder.sh` script to suit and then running it. 131 | 132 | ## Other data types 133 | 134 | Work has been done on some of the other data types, tools and data managers such as those that work on multiple genomes at once like Busco, Metaphlan etc. These can be found in the `older_attempts` directory along with appropriate README. 135 | ## How to use the reference data 136 | 137 | If you want to use the reference data, please have a look at our [ansible-role](https://github.com/galaxyproject/ansible-cvmfs 138 | ) and the [example playbook](https://github.com/usegalaxy-eu/cvmfs-example). 139 | 140 | -------------------------------------------------------------------------------- /ansible/.pass: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ -z "$IDC_VAULT_PASS" ]; then 4 | echo 'WARNING: $IDC_VAULT_PASS is unset, prompting for password' >&2 5 | echo -n 'Vault password: ' >&2 6 | stty -echo 7 | read IDC_VAULT_PASS 8 | stty echo 9 | echo '' >&2 10 | fi 11 | 12 | echo "$IDC_VAULT_PASS" 13 | -------------------------------------------------------------------------------- /ansible/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | 3 | # default inventory file 4 | inventory = inventory.yaml 5 | 6 | # use pass for vault 7 | vault_password_file = ./.pass 8 | 9 | # include things from galaxyproject/ansible-common-roles 10 | roles_path = roles 11 | collections_paths = collections 12 | 13 | # use openssh so that we can persist connections 14 | transport = ssh 15 | 16 | # make error messages readable 17 | stdout_callback = yaml 18 | 19 | [ssh_connection] 20 | 21 | # enable pipelining with OpenSSH 22 | pipelining = True 23 | 24 | # These are necessary for cloud instances 25 | #pipelining = False 26 | #ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no 27 | 28 | # This verbosity is not needed 29 | #[diff] 30 | #always = True 31 | -------------------------------------------------------------------------------- /ansible/files/idc/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | email='idc@galaxyproject.org' 4 | username='idc' 5 | password='PBKDF2$sha256$100000$XhmbiqICQVhoO+7z$kdb1UThcjcvljNvpdCCUVYU9EZwG2sQG' 6 | database='idc' 7 | sleep_time=5 8 | sleep_count=30 9 | 10 | sql=" 11 | INSERT INTO galaxy_user 12 | (create_time, update_time, email, username, password, last_password_change, external, deleted, purged, active) 13 | VALUES 14 | (NOW(), NOW(), '$email', '$username', '$password', NOW(), false, false, false, true) 15 | " 16 | 17 | count=0 18 | while [ $(psql -At -c "SELECT EXISTS (SELECT relname FROM pg_class WHERE relname = 'galaxy_user')" "$database") = 'f' ]; do 19 | echo "waiting for galaxy_user table..." 20 | count=$((count + 1)) 21 | [ $count -lt $sleep_count ] || { echo "timed out"; exit 1; } 22 | sleep $sleep_time 23 | done 24 | 25 | if [ $(psql -At -c "SELECT count(*) FROM galaxy_user WHERE username = '$username'" "$database") -eq 0 ]; then 26 | psql -c "$sql" "$database" 27 | fi 28 | -------------------------------------------------------------------------------- /ansible/files/idc/data_manager_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ${path} 11 | ${dbkey}/seq/${path} 12 | 13 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/seq/${path} 14 | abspath 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | ${len_path} 25 | ${value}/len/${len_path} 26 | 27 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${value}/len/${len_path} 28 | abspath 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | ${dbkey}/bowtie2_index/${value} 43 | 44 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie2_index/${value}/${path} 45 | abspath 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie2_index/${value}/${path} 57 | abspath 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | ${dbkey}/bwa_mem_index/${value} 72 | 73 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bwa_mem_index/${value}/${path} 74 | abspath 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | ${path} 87 | ${value}/seq/${path} 88 | 89 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${value}/seq/${path} 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${value}/seq/${path} 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${value}/seq/${path} 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | ${dbkey}/picard_index/${value} 121 | 122 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/picard_index/${value}/${path} 123 | abspath 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | ${dbkey}/sam_indexes/${value} 138 | 139 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/sam_indexes/${value}/${path} 140 | abspath 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | ${dbkey}/hisat2_index/${value} 155 | 156 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/hisat2_index/${value}/${path} 157 | abspath 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 174 | rnastar/${version}/${dbkey}/${value}/${path} 175 | 176 | ${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}/${path} 177 | abspath 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | ${dbkey}/bowtie_index 194 | 195 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie_index/${path} 196 | abspath 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | ${dbkey}/kallisto_index/${value} 211 | 212 | ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/kallisto_index/${value}/${path} 213 | abspath 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | snpEff/v4_3/data 238 | 239 | ${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/v4_3/data 240 | abspath 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | > 261 | ${path} 262 | plant_tribes/scaffolds/${value} 263 | 264 | ${GALAXY_DATA_MANAGER_DATA_PATH}/plant_tribes/scaffolds/${value} 265 | abspath 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | ${path}/taxonomy 279 | ncbi_taxonomy/${value} 280 | 281 | ${GALAXY_DATA_MANAGER_DATA_PATH}/ncbi_taxonomy/${value} 282 | abspath 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | ${path}/accession2taxid 293 | ncbi_accession2taxid/${value} 294 | 295 | ${GALAXY_DATA_MANAGER_DATA_PATH}/ncbi_accession2taxid/${value} 296 | abspath 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | gemini/${version}/${dbkey}/${value} 311 | 312 | ${GALAXY_DATA_MANAGER_DATA_PATH}/gemini/${version}/${dbkey}/${value}/ 313 | abspath 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | ${path} 326 | kraken2_databases/${path} 327 | 328 | ${GALAXY_DATA_MANAGER_DATA_PATH}/kraken2_databases/${path} 329 | abspath 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | -------------------------------------------------------------------------------- /ansible/files/idc/workflow_schedulers_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ansible/group_vars/idc_builders.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | tailscale_authkey: !vault | 4 | $ANSIBLE_VAULT;1.1;AES256 5 | 35346435633731353836346266636239366333343532373365393332613035353436373632363235 6 | 3035623864306265396366636637306531383162346637620a613064383461306134373035306663 7 | 35663562373035653262343066623332366166643866313562373136393339306165303437363833 8 | 3664366566353137360a613666333664343837336132333430343636333738663464306161346639 9 | 31636366643837386431636265386662663362623232643766333436393862633838616531306630 10 | 33396438633639636634306362353462376233633265393233393962343934343230616632333134 11 | 313765363637303239646162306161663662 12 | 13 | minio_access_key: idc 14 | minio_secret_key: !vault | 15 | $ANSIBLE_VAULT;1.1;AES256 16 | 63343861656633656332623062386662613835383031396531383961623866333734383934323361 17 | 3232616334626430333463346330363432366137613337390a356264386363303266353964376566 18 | 32373933663436646566633237313135626265393936656332373633383236616534643864366532 19 | 6130616466666665330a393833373965623365333239303039383061363334663031646632316333 20 | 30396263346464306233646466323162653437663134363261653239366630643935383964666533 21 | 3032646166333162663934623635336630323035336639313163 22 | 23 | galaxy_database_password: !vault | 24 | $ANSIBLE_VAULT;1.1;AES256 25 | 61353439363266313231353362333636633830353237653338643765313963373434303365653734 26 | 6466336130346536303136386165326665663965313365630a313564326131356237333232623637 27 | 31656534323962383662663734363430373931646166646663313430366630373561346532353536 28 | 3763316333343132640a363639663636396665313831323430363038633630613433373230663936 29 | 61333338623663323432313765613332643530643862383230363033623237313263623162613164 30 | 3532653230333665313032633133613836653031323965343234 31 | 32 | os_key_name: idc 33 | 34 | os_image: usegalaxy-node 35 | os_flavor: m3.small 36 | os_security_groups: [default, ssh-only] 37 | 38 | host_groups: 39 | - name: idc 40 | gid: 808 41 | 42 | host_users: 43 | - name: idc 44 | home: /home/idc 45 | uid: 808 46 | group: idc 47 | shell: /bin/sh 48 | 49 | host_directories: 50 | - path: /jetstream2/scratch/idc 51 | owner: idc 52 | group: idc 53 | mode: "0755" 54 | 55 | nginx_flavor: core 56 | nginx_enable_default_server: false 57 | nginx_servers: 58 | - idc-build 59 | 60 | galaxy_layout: root-dir 61 | galaxy_root: /srv/galaxy 62 | galaxy_user: 63 | name: idc 64 | galaxy_server_dir: /cvmfs/main.galaxyproject.org/galaxy 65 | galaxy_venv_dir: /cvmfs/main.galaxyproject.org/venv 66 | galaxy_config_dir: "{{ galaxy_root }}/config" 67 | galaxy_shed_tools_dir: /jetstream2/scratch/idc/shed_tools 68 | galaxy_manage_clone: false 69 | galaxy_manage_download: false 70 | galaxy_manage_existing: false 71 | galaxy_manage_paths: true 72 | galaxy_manage_static_setup: true 73 | galaxy_manage_mutable_setup: true 74 | galaxy_manage_database: false 75 | galaxy_fetch_dependencies: false 76 | galaxy_build_client: false 77 | galaxy_backup_configfiles: false 78 | galaxy_manage_gravity: true 79 | 80 | galaxy_privsep_dirs: 81 | - "{{ galaxy_config_dir }}" 82 | - "{{ galaxy_config_dir }}/tpv" 83 | galaxy_config_files: 84 | - src: files/idc/workflow_schedulers_conf.xml 85 | dest: "{{ galaxy_config_dir }}/workflow_schedulers_conf.xml" 86 | - src: files/idc/data_manager_conf.xml 87 | dest: "{{ galaxy_config_dir }}/data_manager_conf.xml" 88 | galaxy_config_templates: 89 | - src: templates/idc/tpv/idc.yaml.j2 90 | dest: "{{ galaxy_config_dir }}/tpv/idc.yaml" 91 | - src: templates/idc/object_store_conf.xml.j2 92 | dest: "{{ galaxy_config_dir }}/object_store_conf.xml" 93 | galaxy_config: 94 | gravity: 95 | process_manager: systemd 96 | galaxy_user: idc 97 | galaxy_group: idc 98 | galaxy_root: "{{ galaxy_server_dir }}" 99 | virtualenv: "{{ galaxy_venv_dir }}" 100 | gunicorn: 101 | bind: localhost:8080 102 | workers: 1 103 | #handlers: 104 | # handler: 105 | # processes: 1 106 | # pools: 107 | # - job-handlers 108 | # - workflow-schedulers 109 | galaxy: 110 | server_name: idc-build 111 | database_connection: "postgresql://idc:{{ galaxy_database_password }}@galaxy-db-02/galaxy_main" 112 | install_database_connection: "sqlite:///{{ galaxy_mutable_data_dir }}/install.sqlite?isolation_level=IMMEDIATE" 113 | tool_data_table_config_path: /cvmfs/idc.galaxyproject.org/config/tool_data_table_conf.xml 114 | data_manager_config_file: "{{ galaxy_config_dir }}/data_manager_conf.xml" 115 | #shed_data_manager_config_file: /cvmfs/main.galaxyproject.org/config/shed_data_manager_conf.xml 116 | conda_auto_init: false 117 | conda_auto_install: false 118 | allow_user_creation: false 119 | #require_login: true 120 | admin_users: 121 | - idc@galaxyproject.org 122 | - nate+test@bx.psu.edu 123 | file_path: /jetstream2/scratch/idc/objects 124 | job_working_directory: /jetstream2/scratch/idc/jobs 125 | object_store_config_file: "{{ galaxy_config_dir }}/object_store_conf.xml" 126 | object_store_store_by: uuid 127 | workflow_schedulers_config_file: "{{ galaxy_config_dir }}/workflow_schedulers_conf.xml" 128 | container_resolvers: 129 | - type: cached_mulled_singularity 130 | cache_directory: /cvmfs/singularity.galaxyproject.org/all 131 | cache_directory_cacher_type: dir_mtime 132 | #- type: mulled_singularity 133 | # cache_directory: /jetstream2/scratch/idc/singularity 134 | - type: explicit_singularity 135 | cleanup_job: never 136 | job_config: 137 | runners: 138 | local: 139 | load: galaxy.jobs.runners.local:LocalJobRunner 140 | workers: 1 141 | slurm: 142 | load: galaxy.jobs.runners.slurm:SlurmJobRunner 143 | workers: 2 144 | drmaa_library_path: /usr/lib64/libdrmaa.so.1 145 | handling: 146 | assign: 147 | - db-self 148 | execution: 149 | default: tpv_dispatcher 150 | environments: 151 | tpv_dispatcher: 152 | runner: dynamic 153 | type: python 154 | function: map_tool_to_destination 155 | rules_module: tpv.rules 156 | tpv_config_files: 157 | - "{{ galaxy_config_dir }}/tpv/idc.yaml" 158 | 159 | # need to exclude shed_data_managers_conf.xml or else the role tries to create/chown its parent 160 | #galaxy_mutable_config_files: 161 | # - src: "shed_tool_data_table_conf.xml" 162 | # dest: "{{ galaxy_config_merged[galaxy_app_config_section].shed_tool_data_table_config }}" 163 | -------------------------------------------------------------------------------- /ansible/group_vars/js2.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # these should work but don't appear to 4 | #export OS_APPLICATION_CREDENTIAL_ID= 5 | #export OS_APPLICATION_CREDENTIAL_SECRET= 6 | 7 | os_application_credential_id: !vault | 8 | $ANSIBLE_VAULT;1.1;AES256 9 | 39623061366238613763346532303434393965386566646362303334343863333663396562333338 10 | 3934386632396262613733306561333462613735356237620a663937386264366132643033376633 11 | 33626638623132646230653962343832656362393335303562383265623233636234653332386436 12 | 6437313033353063310a633532393132643830303238396232313137306234316162326336326365 13 | 34346239663462396363636535313739623539393564346263363266343333653637303562393339 14 | 6263313934356130616163313563303362353937663234363634 15 | 16 | os_application_credential_secret: !vault | 17 | $ANSIBLE_VAULT;1.1;AES256 18 | 32373838393538356139376130666461346265346130373166613432343134343565643262653362 19 | 3062643765616631613365306363303635333136333433310a633434616262666632623265313564 20 | 32653762303366353861666465656630303838613930313136376565333162643165653937393963 21 | 6562383562383935640a356164303962613331356366353462356539616261663261306339346139 22 | 61356632666431643132626563303466373938623262633361646337303666366531386161636536 23 | 62336338636265326138356463393534393765323933623631313431373462326461383764316331 24 | 33353636306338636531303837636561353031663966643961363739306131303034333836343434 25 | 64333364366365343033313166663236393761653436396234316563663264656637643233333334 26 | 3834 27 | 28 | cloud_id: js2 29 | clouds_yaml: 30 | clouds: 31 | js2: 32 | auth: 33 | application_credential_id: "{{ os_application_credential_id }}" 34 | application_credential_secret: "{{ os_application_credential_secret }}" 35 | auth_url: 'https://js2.jetstream-cloud.org:5000/v3/' 36 | region_name: "IU" 37 | interface: "public" 38 | identity_api_version: 3 39 | auth_type: "v3applicationcredential" 40 | 41 | os_cloud_id: "{{ cloud_id }}" 42 | os_clouds_yaml: "{{ clouds_yaml }}" 43 | 44 | os_name: "{{ inventory_hostname_short }}" 45 | 46 | os_nics: 47 | - net-name: "usegalaxy" 48 | -------------------------------------------------------------------------------- /ansible/inventory.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # idc hosts 3 | 4 | all: 5 | hosts: 6 | idc-build.galaxyproject.org: 7 | ansible_user: rocky 8 | ansible_become: true 9 | ansible_ssh_private_key_file: ~/.ssh/id_rsa_idc_jetstream2_cvmfs 10 | ansible_ssh_common_args: -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no 11 | children: 12 | idc_builders: 13 | hosts: 14 | idc-build.galaxyproject.org: 15 | js2: 16 | hosts: 17 | idc-build.galaxyproject.org: 18 | -------------------------------------------------------------------------------- /ansible/playbook-launch.yaml: -------------------------------------------------------------------------------- 1 | - name: Spawn instance 2 | hosts: idc_builders 3 | gather_facts: no 4 | environment: 5 | OS_CLOUD: "{{ os_cloud_id }}" 6 | OS_IDENTITY_API_VERSION: '3' 7 | roles: 8 | - openstack 9 | 10 | - name: Configure Galaxy 11 | hosts: idc_builders 12 | tasks: 13 | - name: Install slurm-drmaa 14 | dnf: 15 | name: https://github.com/natefoo/slurm-drmaa/releases/download/1.1.4/slurm-drmaa-1.1.4-22.05.el9.x86_64.rpm 16 | disable_gpg_check: true 17 | - import_role: 18 | name: galaxyproject.general.virtual 19 | - import_role: 20 | name: galaxyproject.general.packages 21 | - import_role: 22 | name: galaxyproject.general.users 23 | - import_role: 24 | name: galaxyproject.general.paths 25 | - import_role: 26 | name: galaxyproject.galaxy 27 | - name: Flush handlers 28 | meta: flush_handlers 29 | - import_role: 30 | name: galaxyproject.nginx 31 | - import_role: 32 | name: artis3n.tailscale 33 | -------------------------------------------------------------------------------- /ansible/playbook-teardown.yaml: -------------------------------------------------------------------------------- 1 | - name: Clean up instance 2 | hosts: idc_builders 3 | tasks: 4 | - name: Remove scratch directory 5 | file: 6 | path: /jetstream2/scratch/idc 7 | state: absent 8 | diff: false 9 | - name: Create logs directory 10 | file: 11 | path: /jetstream2/scratch/idc-build-logs 12 | owner: rocky 13 | group: rocky 14 | mode: "0755" 15 | state: directory 16 | - name: Dump gunicorn logs 17 | shell: journalctl -u galaxy-gunicorn.service > /jetstream2/scratch/idc-build-logs/gunicorn-{{ lookup('ansible.builtin.env', 'BUILD_NUMBER') | default('unknown') }}.log 18 | become_user: rocky 19 | - name: Log out from tailnet 20 | command: tailscale logout 21 | 22 | - name: Destroy instance 23 | hosts: idc_builders 24 | gather_facts: no 25 | environment: 26 | OS_CLOUD: "{{ os_cloud_id }}" 27 | OS_IDENTITY_API_VERSION: '3' 28 | tasks: 29 | - import_tasks: roles/openstack/tasks/secrets.yml 30 | - name: Destroy instance 31 | os_server: 32 | name: "{{ os_name | default(inventory_hostname) }}" 33 | state: absent 34 | delegate_to: localhost 35 | - import_tasks: roles/openstack/tasks/clean.yml 36 | -------------------------------------------------------------------------------- /ansible/requirements.txt: -------------------------------------------------------------------------------- 1 | # the python requirements for running this playbook 2 | ansible<7 3 | dnspython 4 | openstacksdk<0.70 5 | -------------------------------------------------------------------------------- /ansible/requirements.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | roles: 4 | - name: artis3n.tailscale 5 | versin: v4.2.2 6 | - name: galaxyproject.galaxy 7 | version: 0.10.15 8 | - name: galaxyproject.nginx 9 | version: 0.7.1 10 | - name: galaxyproject.postgresql 11 | version: 1.1.2 12 | - name: galaxyproject.postgresql_objects 13 | version: 1.2.0 14 | 15 | collections: 16 | - name: galaxyproject.general 17 | version: 1.0.0 18 | -------------------------------------------------------------------------------- /ansible/roles/openstack/tasks/clean.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Remove clouds.yaml 4 | file: 5 | path: "{{ inventory_dir }}/clouds.yaml" 6 | state: absent 7 | delegate_to: localhost 8 | run_once: true 9 | become: false 10 | 11 | - name: Remove additional secrets 12 | file: 13 | path: "{{ inventory_dir }}/{{ item.dest }}" 14 | state: absent 15 | loop: "{{ os_secrets | default([]) }}" 16 | delegate_to: localhost 17 | run_once: yes 18 | no_log: yes 19 | -------------------------------------------------------------------------------- /ansible/roles/openstack/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - import_tasks: secrets.yml 4 | 5 | - include_tasks: spawn.yml 6 | when: 7 | - os_image is defined 8 | - os_flavor is defined 9 | - os_key_name is defined 10 | - os_nics is defined 11 | - os_security_groups is defined 12 | -------------------------------------------------------------------------------- /ansible/roles/openstack/tasks/secrets.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Write clouds.yaml 4 | copy: 5 | content: "{{ os_clouds_yaml | to_nice_yaml }}" 6 | dest: "{{ playbook_dir }}/clouds.yaml" 7 | mode: "0400" 8 | delegate_to: localhost 9 | run_once: true 10 | no_log: yes 11 | 12 | - name: Write additional secrets 13 | copy: 14 | content: "{{ item.content }}" 15 | dest: "{{ inventory_dir }}/{{ item.dest }}" 16 | mode: "0400" 17 | loop: "{{ os_secrets | default([]) }}" 18 | delegate_to: localhost 19 | run_once: yes 20 | loop_control: 21 | label: "{{ item.dest }}" 22 | -------------------------------------------------------------------------------- /ansible/roles/openstack/tasks/spawn.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Instance spawn block 4 | block: 5 | 6 | - name: Create volumes 7 | os_volume: 8 | cloud: "{{ os_cloud_id }}" 9 | display_name: "{{ item.name }}" 10 | size: "{{ item.size }}" 11 | delegate_to: localhost 12 | loop: "{{ os_volumes | default([]) }}" 13 | 14 | - name: Spawn new instance 15 | os_server: 16 | cloud: "{{ os_cloud_id }}" 17 | name: "{{ os_name | default(inventory_hostname) }}" 18 | image: "{{ os_image }}" 19 | flavor: "{{ os_flavor }}" 20 | key_name: "{{ os_key_name }}" 21 | nics: "{{ os_nics }}" 22 | security_groups: "{{ os_security_groups }}" 23 | auto_ip: "{{ os_auto_ip | default(omit) }}" 24 | floating_ips: "{{ os_floating_ips | default(omit) }}" 25 | meta: "group={{ group_names[0] }}" 26 | userdata: | 27 | #cloud-config 28 | package_upgrade: false 29 | delegate_to: localhost 30 | register: __spawn_result 31 | 32 | - name: Attach volumes to instances 33 | os_server_volume: 34 | cloud: "{{ os_cloud_id }}" 35 | server: "{{ os_name | default(inventory_hostname) }}" 36 | volume: "{{ item.name }}" 37 | delegate_to: localhost 38 | loop: "{{ os_volumes | default([]) }}" 39 | register: __attach_result 40 | 41 | - name: Set volume device(s) fact 42 | set_fact: 43 | __os_volume_devices: "{{ __attach_result.results | selectattr('attachments', 'defined') | map(attribute='attachments') | flatten | map(attribute='device') | list }}" 44 | 45 | - name: Set filesystems fact 46 | set_fact: 47 | filesystems: "{{ (filesystems | default([])) + [{'dev': item[1], 'fstype': item[0]}] }}" 48 | loop: "{{ (os_volumes | map(attribute='fstype')) | zip(__os_volume_devices) | list }}" 49 | when: os_volumes is defined 50 | 51 | - name: Update inventory with spawned instance IP 52 | set_fact: 53 | ansible_host: "{{ __spawn_result.server.public_v4 or __spawn_result.server.private_v4 }}" 54 | #delegate_to: localhost 55 | 56 | - name: Log IP addresses 57 | debug: 58 | var: ansible_host 59 | 60 | - name: Wait for instance to become accessible 61 | wait_for_connection: 62 | timeout: 120 63 | 64 | - name: Set authorized keys 65 | authorized_key: 66 | user: "{{ ansible_user }}" 67 | key: "{% for authorized in os_admin_users %}{{ ssh_public_keys[authorized] ~ '\n' }}{% endfor %}" 68 | exclusive: "{{ os_admin_users_exclusive | default('yes') }}" 69 | when: os_admin_users is defined 70 | 71 | always: 72 | 73 | - import_tasks: clean.yml 74 | 75 | rescue: 76 | 77 | - fail: 78 | msg: Exiting due to previous failure 79 | -------------------------------------------------------------------------------- /ansible/templates/idc/object_store_conf.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /ansible/templates/idc/tpv/idc.yaml.j2: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | destinations: 4 | jetstream2: 5 | runner: slurm 6 | max_accepted_cores: 64 7 | max_accepted_mem: 244 8 | context: 9 | partition: priority 10 | time: 24:00:00 11 | params: 12 | native_specification: "--nodes=1 --ntasks={cores} --mem={round(mem*1024)} --time={time} --partition={partition}" 13 | tmp_dir: true 14 | outputs_to_working_directory: true 15 | singularity_enabled: true 16 | singularity_volumes: "$galaxy_root:ro,$tool_directory:ro,$working_directory:rw,$job_directory:rw,/cvmfs/main.galaxyproject.org:ro,/cvmfs/idc.galaxyproject.org:ro" 17 | singularity_default_container_id: /cvmfs/singularity.galaxyproject.org/all/python:3.8.3 18 | env: 19 | - execute: ulimit -c 0 20 | - name: _JAVA_OPTIONS 21 | value: $_JAVA_OPTIONS -Djava.io.tmpdir=$TEMP 22 | - name: SINGULARITYENV__JAVA_OPTIONS 23 | value: $_JAVA_OPTIONS 24 | - name: SINGULARITYENV_TERM 25 | value: vt100 26 | - name: SINGULARITYENV_LC_ALL 27 | value: C 28 | - name: SINGULARITYENV_TEMP 29 | value: $TEMP 30 | - name: SINGULARITYENV_TMPDIR 31 | value: $TEMP 32 | 33 | global: 34 | default_inherits: _default 35 | 36 | tools: 37 | _default: 38 | abstract: true 39 | cores: 1 40 | mem: cores * 2.89 41 | env: 42 | - name: _JAVA_OPTIONS 43 | value: $_JAVA_OPTIONS -Xmx{round(mem*0.9*1024)}m -Xms256m 44 | .*bowtie2_index_builder_data_manager.*: 45 | cores: 16 46 | .*bwa_mem_index_builder_data_manager.*: 47 | cores: 12 48 | mem: 48 49 | .*twobit_builder_data_manager.*: 50 | cores: 16 51 | mem: 36 52 | .*picard_index_builder_data_manager.*: 53 | mem: 12 54 | .*hisat2_index_builder_data_manager.*: 55 | cores: 10 56 | mem: 120 57 | .*rna_star_index_builder_data_manager.*: 58 | cores: 10 59 | mem: 120 60 | .*kraken2_build_database.*: 61 | cores: 64 62 | mem: 240 63 | -------------------------------------------------------------------------------- /ansible/templates/nginx/idc-build.j2: -------------------------------------------------------------------------------- 1 | ## 2 | ## This file is maintained by Ansible - CHANGES WILL BE OVERWRITTEN 3 | ## 4 | 5 | upstream galaxy { 6 | server 127.0.0.1:8080; 7 | } 8 | 9 | server { 10 | listen *:80 default_server; 11 | server_name idc-build.galaxyproject.org; 12 | 13 | access_log syslog:server=unix:/dev/log; 14 | error_log syslog:server=unix:/dev/log; 15 | 16 | location / { 17 | proxy_pass http://galaxy; 18 | proxy_redirect off; 19 | proxy_set_header Host $host; 20 | proxy_set_header X-Real-IP $remote_addr; 21 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 22 | proxy_set_header X-Forwarded-Proto $scheme; 23 | } 24 | 25 | location /static { 26 | alias {{ galaxy_server_dir }}/static; 27 | } 28 | 29 | location /robots.txt { 30 | alias {{ galaxy_server_dir }}/static/robots.txt; 31 | } 32 | 33 | location /favicon.ico { 34 | alias {{ galaxy_server_dir }}/static/favicon.ico; 35 | } 36 | 37 | location /_x_accel_redirect { 38 | internal; 39 | alias /; 40 | add_header X-Frame-Options SAMEORIGIN; 41 | add_header X-Content-Type-Options nosniff; 42 | } 43 | 44 | location /jetstream2/scratch/idc/objects { 45 | internal; 46 | alias /jetstream2/scratch/idc/objects; 47 | } 48 | } 49 | 50 | # vim: set filetype=nginx 51 | -------------------------------------------------------------------------------- /config/tool_data_table_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | value, dbkey, name, path 4 | 5 |
6 | 7 | value, name, len_path 8 | 9 |
10 | 11 | value, dbkey, name, path 12 | 13 |
14 | 15 | value, dbkey, name, path 16 | 17 |
18 | 19 | value, path 20 | 21 |
22 | 23 | value, name, path 24 | 25 |
26 | 27 | type, value, path 28 | 29 |
30 | 31 | value, dbkey, name, path 32 | 33 |
34 | 35 | value, dbkey, name, path 36 | 37 |
38 | 39 | value, dbkey, name, path 40 | 41 |
42 | 43 | value, dbkey, name, path, with_gene_model, version 44 | 45 |
46 | 47 | value, dbkey, name, path 48 | 49 |
50 | 51 | value, dbkey, name, path 52 | 53 |
54 | 55 | key, version, value, name, path 56 | 57 |
58 | 59 | key, version, genome, value, name 60 | 61 |
62 | 63 | key, version, value, name 64 | 65 |
66 | 67 | value, name, path, description 68 | 69 |
70 | 71 | value, name, path 72 | 73 |
74 | 75 | value, name, path 76 | 77 |
78 | 79 | value, dbkey, version, name, path 80 | 81 |
82 | 83 | value, name, path 84 | 85 |
86 | 87 | value, name, path 88 | 89 |
90 |
91 | -------------------------------------------------------------------------------- /cvmfs_current_managed/cvmfs_data_managers.yml: -------------------------------------------------------------------------------- 1 | tools: 2 | - name: data_manager_manual 3 | owner: iuc 4 | revisions: 5 | - 6524e573d9c2 6 | tool_panel_section_label: None 7 | tool_shed_url: toolshed.g2.bx.psu.edu 8 | 9 | - name: data_manager_bowtie2_index_builder 10 | owner: devteam 11 | revisions: 12 | - e87aeff2cf88 13 | - 83da94c0e4a6 14 | tool_panel_section_label: None 15 | tool_shed_url: toolshed.g2.bx.psu.edu 16 | tags: 17 | - genome 18 | 19 | - name: data_manager_gatk_picard_index_builder 20 | owner: devteam 21 | revisions: 22 | - b31f1fcb203c 23 | tool_panel_section_label: None 24 | tool_shed_url: toolshed.g2.bx.psu.edu 25 | tags: 26 | - genome 27 | 28 | - name: data_manager_hisat_index_builder 29 | owner: devteam 30 | revisions: 31 | - ba11fef120cd 32 | tool_panel_section_label: None 33 | tool_shed_url: toolshed.g2.bx.psu.edu 34 | tags: 35 | - genome 36 | 37 | - name: data_manager_fetch_genome_dbkeys_all_fasta 38 | owner: devteam 39 | revisions: 40 | - b1bc53e9bbc5 41 | - 776bb1b478a0 42 | tool_panel_section_label: None 43 | tool_shed_url: toolshed.g2.bx.psu.edu 44 | tags: 45 | - fetch_source 46 | 47 | - name: data_manager_snpeff 48 | owner: iuc 49 | revisions: 50 | - a6400027d849 51 | - 9ac823a8b328 52 | tool_panel_section_label: None 53 | tool_shed_url: toolshed.g2.bx.psu.edu 54 | tags: 55 | - snpeff 56 | 57 | - name: data_manager_plant_tribes_scaffolds_downloader 58 | owner: iuc 59 | revisions: 60 | - 5833ef61c1f8 61 | tool_panel_section_label: None 62 | tool_shed_url: toolshed.g2.bx.psu.edu 63 | tags: 64 | - plant_source 65 | 66 | - name: data_manager_twobit_builder 67 | owner: devteam 68 | revisions: 69 | - 74b09c8e5f6e 70 | - 9946bc39c834 71 | tool_panel_section_label: None 72 | tool_shed_url: toolshed.g2.bx.psu.edu 73 | tags: 74 | - genome 75 | 76 | - name: data_manager_diamond_database_builder 77 | owner: bgruening 78 | revisions: 79 | - ce62d0912b10 80 | tool_panel_section_label: None 81 | tool_shed_url: toolshed.g2.bx.psu.edu 82 | tags: 83 | - genome 84 | 85 | - name: data_manager_fetch_ncbi_taxonomy 86 | owner: devteam 87 | revisions: 88 | - 926847693e4d 89 | tool_panel_section_label: None 90 | tool_shed_url: toolshed.g2.bx.psu.edu 91 | tags: 92 | - tax_source 93 | 94 | - name: data_manager_picard_index_builder 95 | owner: devteam 96 | revisions: 97 | - b99040168706 98 | - 00491eabe22b 99 | tool_panel_section_label: None 100 | tool_shed_url: toolshed.g2.bx.psu.edu 101 | tags: 102 | - genome 103 | 104 | - name: data_manager_gemini_database_downloader 105 | owner: iuc 106 | revisions: 107 | - f57426daa04d 108 | tool_panel_section_label: None 109 | tool_shed_url: toolshed.g2.bx.psu.edu 110 | tags: 111 | - gemini 112 | 113 | - name: data_manager_snpeff 114 | owner: iuc 115 | revisions: 116 | - a6e6e8415b7f 117 | tool_panel_section_label: None 118 | tool_shed_url: testtoolshed.g2.bx.psu.edu 119 | tags: 120 | - snpeff 121 | 122 | - name: data_manager_bwa_mem_index_builder 123 | owner: devteam 124 | revisions: 125 | - 46066df8813d 126 | - cb0147ade868 127 | tool_panel_section_label: None 128 | tool_shed_url: toolshed.g2.bx.psu.edu 129 | tags: 130 | - genome 131 | 132 | - name: data_manager_fetch_genome_all_fasta 133 | owner: devteam 134 | revisions: 135 | - fb744a070bee 136 | tool_panel_section_label: None 137 | tool_shed_url: toolshed.g2.bx.psu.edu 138 | tags: 139 | - deprecated 140 | 141 | - name: data_manager_sam_fasta_index_builder 142 | owner: devteam 143 | revisions: 144 | - 2a1ac1abc3f7 145 | tool_panel_section_label: None 146 | tool_shed_url: toolshed.g2.bx.psu.edu 147 | tags: 148 | - genome 149 | 150 | - name: data_manager_hisat2_index_builder 151 | owner: iuc 152 | revisions: 153 | - d210e1f185bd 154 | - 98a60a4cfb9a 155 | tool_panel_section_label: None 156 | tool_shed_url: toolshed.g2.bx.psu.edu 157 | tags: 158 | - genome 159 | 160 | - name: data_manager_star_index_builder 161 | owner: iuc 162 | revisions: 163 | - 6ef6520f14fc 164 | - 50ca9af6db2e 165 | tool_panel_section_label: None 166 | tool_shed_url: toolshed.g2.bx.psu.edu 167 | tags: 168 | - genome 169 | 170 | - name: data_manager_bowtie_index_builder 171 | owner: iuc 172 | revisions: 173 | - 86e9af693a33 174 | - 35c1e3785c90 175 | tool_panel_section_label: None 176 | tool_shed_url: toolshed.g2.bx.psu.edu 177 | tags: 178 | - genome 179 | 180 | - name: data_manager_kallisto_index_builder 181 | owner: iuc 182 | revisions: 183 | - 6843a0db2da0 184 | tool_panel_section_label: None 185 | tool_shed_url: toolshed.g2.bx.psu.edu 186 | tags: 187 | - genome 188 | -------------------------------------------------------------------------------- /cvmfs_current_managed/cvmfs_managed_genomes.yml: -------------------------------------------------------------------------------- 1 | - genomes: 2 | - dbkey: hg19_rCRS_pUC18_phiX174 3 | description: Homo sapiens (hg19 with mtDNA replaced with rCRS, and containing pUC18 4 | and phiX174) 5 | id: hg19_rCRS_pUC18_phiX174 6 | indexers: 7 | - data_manager_twobit_builder 8 | - data_manager_fetch_genome_all_fasta 9 | - data_manager_bowtie2_index_builder 10 | - data_manager_bwa_mem_index_builder 11 | - data_manager_sam_fasta_index_builder 12 | - data_manager_hisat2_index_builder 13 | - data_manager_picard_index_builder 14 | - data_manager_star_index_builder 15 | 16 | - dbkey: rn6 17 | description: Rat Jul. 2014 (RGSC 6.0/rn6) (rn6) 18 | id: rn6 19 | indexers: 20 | - data_manager_twobit_builder 21 | - data_manager_fetch_genome_all_fasta 22 | - data_manager_bowtie2_index_builder 23 | - data_manager_bwa_mem_index_builder 24 | - data_manager_sam_fasta_index_builder 25 | - data_manager_hisat2_index_builder 26 | - data_manager_picard_index_builder 27 | - data_manager_star_index_builder 28 | 29 | - dbkey: dm6 30 | description: D. melanogaster Aug. 2014 (BDGP Release 6 + ISO1 MT/dm6) (dm6) 31 | id: dm6 32 | indexers: 33 | - data_manager_twobit_builder 34 | - data_manager_fetch_genome_all_fasta 35 | - data_manager_bowtie2_index_builder 36 | - data_manager_bwa_mem_index_builder 37 | - data_manager_sam_fasta_index_builder 38 | - data_manager_hisat2_index_builder 39 | - data_manager_picard_index_builder 40 | - data_manager_star_index_builder 41 | 42 | - dbkey: musFur1 43 | description: Ferret Apr. 2011 (MusPutFur1.0/musFur1) (musFur1) 44 | id: musFur1 45 | indexers: 46 | - data_manager_twobit_builder 47 | - data_manager_fetch_genome_all_fasta 48 | - data_manager_bowtie2_index_builder 49 | - data_manager_bwa_mem_index_builder 50 | - data_manager_sam_fasta_index_builder 51 | - data_manager_hisat2_index_builder 52 | - data_manager_picard_index_builder 53 | 54 | - dbkey: nomLeu3 55 | description: Gibbon Oct. 2012 (GGSC Nleu3.0/nomLeu3) (nomLeu3) 56 | id: nomLeu3 57 | indexers: 58 | - data_manager_twobit_builder 59 | - data_manager_fetch_genome_all_fasta 60 | - data_manager_bowtie2_index_builder 61 | - data_manager_bwa_mem_index_builder 62 | - data_manager_sam_fasta_index_builder 63 | - data_manager_hisat2_index_builder 64 | - data_manager_picard_index_builder 65 | 66 | - dbkey: cerSim1 67 | description: White rhinoceros May 2012 (CerSimSim1.0/cerSim1) (cerSim1) 68 | id: cerSim1 69 | indexers: 70 | - data_manager_twobit_builder 71 | - data_manager_fetch_genome_all_fasta 72 | - data_manager_bowtie2_index_builder 73 | - data_manager_bwa_mem_index_builder 74 | - data_manager_sam_fasta_index_builder 75 | - data_manager_hisat2_index_builder 76 | - data_manager_picard_index_builder 77 | 78 | - dbkey: danRer10 79 | description: Zebrafish Sep. 2014 (GRCz10/danRer10) (danRer10) 80 | id: danRer10 81 | indexers: 82 | - data_manager_twobit_builder 83 | - data_manager_fetch_genome_all_fasta 84 | - data_manager_bowtie2_index_builder 85 | - data_manager_sam_fasta_index_builder 86 | - data_manager_picard_index_builder 87 | 88 | - dbkey: papAnu2 89 | description: Baboon Mar. 2012 (Baylor Panu_2.0/papAnu2) (papAnu2) 90 | id: papAnu2 91 | indexers: 92 | - data_manager_twobit_builder 93 | - data_manager_fetch_genome_all_fasta 94 | - data_manager_bowtie2_index_builder 95 | - data_manager_bwa_mem_index_builder 96 | - data_manager_sam_fasta_index_builder 97 | - data_manager_hisat2_index_builder 98 | - data_manager_picard_index_builder 99 | 100 | - dbkey: bosTau8 101 | description: Cow Jun. 2014 (Bos_taurus_UMD_3.1.1/bosTau8) (bosTau8) 102 | id: bosTau8 103 | indexers: 104 | - data_manager_twobit_builder 105 | - data_manager_fetch_genome_all_fasta 106 | - data_manager_bowtie2_index_builder 107 | - data_manager_bwa_mem_index_builder 108 | - data_manager_sam_fasta_index_builder 109 | - data_manager_hisat2_index_builder 110 | - data_manager_picard_index_builder 111 | 112 | - dbkey: melUnd1 113 | description: 'Budgerigar (Melopsittacus undulatus): melUnd1' 114 | id: melUnd1 115 | indexers: 116 | - data_manager_twobit_builder 117 | - data_manager_bowtie2_index_builder 118 | - data_manager_bwa_mem_index_builder 119 | - data_manager_hisat2_index_builder 120 | 121 | - dbkey: allMis1 122 | description: American alligator Aug. 2012 (allMis0.2/allMis1) (allMis1) 123 | id: allMis1 124 | indexers: 125 | - data_manager_twobit_builder 126 | - data_manager_fetch_genome_all_fasta 127 | - data_manager_bowtie2_index_builder 128 | - data_manager_bwa_mem_index_builder 129 | - data_manager_sam_fasta_index_builder 130 | - data_manager_hisat2_index_builder 131 | - data_manager_picard_index_builder 132 | 133 | - dbkey: vicPac1 134 | description: Alpaca Jul. 2008 (Broad/vicPac1) (vicPac1) 135 | id: vicPac1 136 | indexers: 137 | - data_manager_twobit_builder 138 | - data_manager_fetch_genome_all_fasta 139 | - data_manager_bowtie2_index_builder 140 | - data_manager_bwa_mem_index_builder 141 | - data_manager_sam_fasta_index_builder 142 | - data_manager_hisat2_index_builder 143 | - data_manager_picard_index_builder 144 | 145 | - dbkey: vicPac2 146 | description: Alpaca Mar. 2013 (Vicugna_pacos-2.0.1/vicPac2) (vicPac2) 147 | id: vicPac2 148 | indexers: 149 | - data_manager_twobit_builder 150 | - data_manager_fetch_genome_all_fasta 151 | - data_manager_bowtie2_index_builder 152 | - data_manager_bwa_mem_index_builder 153 | - data_manager_sam_fasta_index_builder 154 | - data_manager_hisat2_index_builder 155 | - data_manager_picard_index_builder 156 | 157 | - dbkey: gadMor1 158 | description: Atlantic cod May 2010 (Genofisk GadMor_May2010/gadMor1) (gadMor1) 159 | id: gadMor1 160 | indexers: 161 | - data_manager_twobit_builder 162 | - data_manager_fetch_genome_all_fasta 163 | - data_manager_bowtie2_index_builder 164 | - data_manager_bwa_mem_index_builder 165 | - data_manager_sam_fasta_index_builder 166 | - data_manager_hisat2_index_builder 167 | - data_manager_picard_index_builder 168 | 169 | - dbkey: dasNov3 170 | description: Armadillo Dec. 2011 (Baylor/dasNov3) (dasNov3) 171 | id: dasNov3 172 | indexers: 173 | - data_manager_twobit_builder 174 | - data_manager_fetch_genome_all_fasta 175 | - data_manager_bowtie2_index_builder 176 | - data_manager_bwa_mem_index_builder 177 | - data_manager_sam_fasta_index_builder 178 | - data_manager_hisat2_index_builder 179 | - data_manager_picard_index_builder 180 | 181 | - dbkey: panPan1 182 | description: Bonobo May. 2012 (Max-Planck/panPan1) (panPan1) 183 | id: panPan1 184 | indexers: 185 | - data_manager_twobit_builder 186 | - data_manager_fetch_genome_all_fasta 187 | - data_manager_bowtie2_index_builder 188 | - data_manager_bwa_mem_index_builder 189 | - data_manager_sam_fasta_index_builder 190 | - data_manager_hisat2_index_builder 191 | - data_manager_picard_index_builder 192 | 193 | - dbkey: felCat8 194 | description: Cat Nov. 2014 (ICGSC Felis_catus_8.0/felCat8) (felCat8) 195 | id: felCat8 196 | indexers: 197 | - data_manager_twobit_builder 198 | - data_manager_fetch_genome_all_fasta 199 | - data_manager_bowtie2_index_builder 200 | - data_manager_bwa_mem_index_builder 201 | - data_manager_sam_fasta_index_builder 202 | - data_manager_hisat2_index_builder 203 | - data_manager_picard_index_builder 204 | 205 | - dbkey: aptMan1 206 | description: Brown Kiwi Jun. 2015 (MPI-EVA AptMant0/aptMan1) (aptMan1) 207 | id: aptMan1 208 | indexers: 209 | - data_manager_twobit_builder 210 | - data_manager_fetch_genome_all_fasta 211 | - data_manager_bowtie2_index_builder 212 | - data_manager_bwa_mem_index_builder 213 | - data_manager_sam_fasta_index_builder 214 | - data_manager_hisat2_index_builder 215 | - data_manager_picard_index_builder 216 | 217 | - dbkey: bosTau7 218 | description: 'Cow (Bos taurus): bosTau7' 219 | id: bosTau7 220 | indexers: 221 | - data_manager_twobit_builder 222 | - data_manager_hisat2_index_builder 223 | 224 | - dbkey: apiMel4 225 | description: A. mellifera 04 Nov 2010 (Amel_4.5/apiMel4) (apiMel4) 226 | id: apiMel4 227 | indexers: 228 | - data_manager_twobit_builder 229 | - data_manager_fetch_genome_all_fasta 230 | - data_manager_bowtie2_index_builder 231 | - data_manager_bwa_mem_index_builder 232 | - data_manager_sam_fasta_index_builder 233 | - data_manager_hisat2_index_builder 234 | - data_manager_picard_index_builder 235 | - data_manager_star_index_builder 236 | 237 | - dbkey: Amel_4.5 238 | description: A. mellifera Nov. 2010 (GCF_000002195.4/Amel_4.5) (Amel_4.5) 239 | id: Amel_4.5 240 | indexers: 241 | - data_manager_twobit_builder 242 | - data_manager_fetch_genome_all_fasta 243 | - data_manager_bowtie2_index_builder 244 | - data_manager_bwa_mem_index_builder 245 | - data_manager_sam_fasta_index_builder 246 | - data_manager_hisat2_index_builder 247 | - data_manager_picard_index_builder 248 | - data_manager_star_index_builder 249 | 250 | - dbkey: taeGut2 251 | description: Zebra finch Feb. 2013 (WashU taeGut324/taeGut2) 252 | id: taeGut2 253 | indexers: 254 | - data_manager_fetch_genome_all_fasta 255 | 256 | - dbkey: criGri1 257 | description: Chinese hamster Jul. 2013 (C_griseus_v1.0/criGri1) (criGri1) 258 | id: criGri1 259 | indexers: 260 | - data_manager_fetch_genome_all_fasta 261 | 262 | - dbkey: latCha1 263 | description: Coelacanth Aug. 2011 (Broad/latCha1) (latCha1) 264 | id: latCha1 265 | indexers: 266 | - data_manager_fetch_genome_all_fasta 267 | 268 | - dbkey: sacCer3 269 | description: 'Yeast (Saccharomyces cerevisiae): sacCer3' 270 | id: sacCer3 271 | indexers: 272 | - data_manager_bowtie2_index_builder 273 | - data_manager_bwa_mem_index_builder 274 | - data_manager_hisat2_index_builder 275 | - data_manager_star_index_builder 276 | 277 | - dbkey: sacCer2 278 | description: 'Yeast (Saccharomyces cerevisiae): sacCer2' 279 | id: sacCer2 280 | indexers: 281 | - data_manager_bowtie2_index_builder 282 | - data_manager_bwa_mem_index_builder 283 | - data_manager_hisat2_index_builder 284 | - data_manager_star_index_builder 285 | 286 | - dbkey: Schizosaccharomyces_pombe_1.1 287 | description: 'Fission Yeast (Schizosaccharomyces pombe): Schizosaccharomyces_pombe_1.1' 288 | id: Schizosaccharomyces_pombe_1.1 289 | indexers: 290 | - data_manager_bowtie2_index_builder 291 | - data_manager_bwa_mem_index_builder 292 | - data_manager_hisat2_index_builder 293 | 294 | - dbkey: galGal4 295 | description: Chicken (Nov 2011, Gallus gallus) 296 | id: galgal4 297 | indexers: 298 | - data_manager_bowtie2_index_builder 299 | 300 | - dbkey: loxAfr1 301 | description: 'Elephant (Loxodonta africana africana): loxAfr1' 302 | id: loxAfr1 303 | indexers: 304 | - data_manager_bowtie2_index_builder 305 | - data_manager_bwa_mem_index_builder 306 | - data_manager_hisat2_index_builder 307 | 308 | - dbkey: loxAfr3 309 | description: 'Elephant (Loxodonta africana africana): loxAfr3' 310 | id: loxAfr3 311 | indexers: 312 | - data_manager_bowtie2_index_builder 313 | - data_manager_bwa_mem_index_builder 314 | - data_manager_hisat2_index_builder 315 | 316 | - dbkey: oryza_sativa_japonica_nipponbare_IRGSP4.0 317 | description: 'Rice (Oryza sativa L. ssp. japonica var. Nipponbare): IRGSP4.0' 318 | id: oryza_sativa_japonica_nipponbare_IRGSP4.0 319 | indexers: 320 | - data_manager_bowtie2_index_builder 321 | 322 | - dbkey: melGal1 323 | description: 'Turkey (Meleagris gallopavo): melGal1' 324 | id: melGal1 325 | indexers: 326 | - data_manager_bowtie2_index_builder 327 | - data_manager_bwa_mem_index_builder 328 | - data_manager_hisat2_index_builder 329 | 330 | - dbkey: equCab1 331 | description: 'Horse (Equus caballus): equCab1' 332 | id: equCab1 333 | indexers: 334 | - data_manager_bowtie2_index_builder 335 | - data_manager_bwa_mem_index_builder 336 | - data_manager_hisat2_index_builder 337 | 338 | - dbkey: rheMac2 339 | description: 'Rhesus Macaque (Macaca mulatta): rheMac2' 340 | id: rheMac2 341 | indexers: 342 | - data_manager_bowtie2_index_builder 343 | - data_manager_bwa_mem_index_builder 344 | - data_manager_hisat2_index_builder 345 | 346 | - dbkey: galGal4 347 | description: 'Chicken (Gallus gallus): galGal4' 348 | id: galGal4 349 | indexers: 350 | - data_manager_bowtie2_index_builder 351 | - data_manager_bwa_mem_index_builder 352 | - data_manager_hisat2_index_builder 353 | 354 | - dbkey: equCab2 355 | description: 'Horse (Equus caballus): equCab2' 356 | id: equCab2 357 | indexers: 358 | - data_manager_bowtie2_index_builder 359 | - data_manager_bwa_mem_index_builder 360 | - data_manager_hisat2_index_builder 361 | 362 | - dbkey: rheMac3 363 | description: 'Rhesus Macaque (Macaca mulatta): rheMac3' 364 | id: rheMac3 365 | indexers: 366 | - data_manager_bowtie2_index_builder 367 | - data_manager_bwa_mem_index_builder 368 | - data_manager_hisat2_index_builder 369 | 370 | - dbkey: eschColi_K12 371 | description: 'Escherichia coli (str. K-12 substr. MG1655): eschColi_K12' 372 | id: eschColi_K12 373 | indexers: 374 | - data_manager_bowtie2_index_builder 375 | - data_manager_bwa_mem_index_builder 376 | - data_manager_hisat2_index_builder 377 | 378 | - dbkey: galGal3 379 | description: 'Chicken (Gallus gallus): galGal3 Canonical' 380 | id: galGal3canon 381 | indexers: 382 | - data_manager_bowtie2_index_builder 383 | - data_manager_bwa_mem_index_builder 384 | 385 | - dbkey: galGal3 386 | description: 'Chicken (Gallus gallus): galGal3 Full' 387 | id: galGal3full 388 | indexers: 389 | - data_manager_bowtie2_index_builder 390 | - data_manager_bwa_mem_index_builder 391 | 392 | - dbkey: canFam3 393 | description: 'Dog (Canis lupus familiaris): canFam3' 394 | id: canFam3 395 | indexers: 396 | - data_manager_bwa_mem_index_builder 397 | - data_manager_hisat2_index_builder 398 | 399 | - dbkey: ce9 400 | description: 'C. elegans (WS210): ce9' 401 | id: ce9 402 | indexers: 403 | - data_manager_bwa_mem_index_builder 404 | - data_manager_hisat2_index_builder 405 | - data_manager_star_index_builder 406 | 407 | - dbkey: canFam2 408 | description: 'Dog (Canis lupus familiaris): canFam2' 409 | id: canFam2 410 | indexers: 411 | - data_manager_bwa_mem_index_builder 412 | - data_manager_hisat2_index_builder 413 | 414 | - dbkey: susScr2 415 | description: 'Pig (Sus scrofa): susScr2' 416 | id: susScr2 417 | indexers: 418 | - data_manager_bwa_mem_index_builder 419 | - data_manager_hisat2_index_builder 420 | 421 | - dbkey: bosTauMd3 422 | description: 'Cow (Bos taurus): bosTauMd3' 423 | id: bosTauMd3 424 | indexers: 425 | - data_manager_bwa_mem_index_builder 426 | - data_manager_hisat2_index_builder 427 | 428 | - dbkey: papHam1 429 | description: 'Baboon (Papio anubis): papHam1' 430 | id: papHam1 431 | indexers: 432 | - data_manager_bwa_mem_index_builder 433 | - data_manager_hisat2_index_builder 434 | 435 | - dbkey: otoGar3 436 | description: 'Bushbaby (Otolemur garnetti): otoGar3' 437 | id: otoGar3 438 | indexers: 439 | - data_manager_bwa_mem_index_builder 440 | - data_manager_hisat2_index_builder 441 | 442 | - dbkey: felCat5 443 | description: 'Cat (Felis catus): felCat5' 444 | id: felCat5 445 | indexers: 446 | - data_manager_bwa_mem_index_builder 447 | - data_manager_hisat2_index_builder 448 | 449 | - dbkey: otoGar1 450 | description: 'Bushbaby (Otolemur garnetti): otoGar1' 451 | id: otoGar1 452 | indexers: 453 | - data_manager_bwa_mem_index_builder 454 | - data_manager_hisat2_index_builder 455 | 456 | - dbkey: panTro4 457 | description: 'Chimpanzee (Pan troglodytes): panTro4' 458 | id: panTro4 459 | indexers: 460 | - data_manager_bwa_mem_index_builder 461 | - data_manager_hisat2_index_builder 462 | 463 | - dbkey: turTru2 464 | description: 'Dolphin (Tursiops truncatus): turTru2' 465 | id: turTru2 466 | indexers: 467 | - data_manager_bwa_mem_index_builder 468 | - data_manager_hisat2_index_builder 469 | 470 | - dbkey: panTro3 471 | description: 'Chimpanzee (Pan troglodytes): panTro3 Canonical' 472 | id: panTro3canon 473 | indexers: 474 | - data_manager_bwa_mem_index_builder 475 | - data_manager_hisat2_index_builder 476 | 477 | - dbkey: panTro3 478 | description: 'Chimpanzee (Pan troglodytes): panTro3 Full' 479 | id: panTro3full 480 | indexers: 481 | - data_manager_bwa_mem_index_builder 482 | - data_manager_hisat2_index_builder 483 | 484 | - dbkey: Araly1 485 | description: 'Arabidopsis lyrata: Araly1' 486 | id: Araly1 487 | indexers: 488 | - data_manager_sam_fasta_index_builder 489 | 490 | - dbkey: dm3 491 | description: 'Fruit Fly (Drosophila melanogaster): dm3' 492 | id: dm3 493 | indexers: 494 | - data_manager_hisat2_index_builder 495 | - data_manager_star_index_builder 496 | 497 | - dbkey: mm10 498 | description: 'Mouse (Mus Musculus): mm10' 499 | id: mm10 500 | indexers: 501 | - data_manager_hisat2_index_builder 502 | - data_manager_star_index_builder 503 | 504 | - dbkey: galGal3 505 | description: 'Chicken (Gallus gallus): galGal3 Full' 506 | id: galGal3 507 | indexers: 508 | - data_manager_hisat2_index_builder 509 | 510 | - dbkey: mm9 511 | description: 'Mouse (Mus musculus): mm9' 512 | id: mm9 513 | indexers: 514 | - data_manager_hisat2_index_builder 515 | - data_manager_star_index_builder 516 | 517 | - dbkey: hg_g1k_v37 518 | description: 'Human (Homo sapiens) (b37): hg_g1k_v37' 519 | id: hg_g1k_v37 520 | indexers: 521 | - data_manager_hisat2_index_builder 522 | 523 | - dbkey: susScr3 524 | description: 'Pig (Sus scrofa): susScr3' 525 | id: susScr3 526 | indexers: 527 | - data_manager_hisat2_index_builder 528 | 529 | - dbkey: ce10 530 | description: 'C. elegans (WS220): ce10' 531 | id: ce10 532 | indexers: 533 | - data_manager_hisat2_index_builder 534 | - data_manager_star_index_builder 535 | 536 | - dbkey: hg19 537 | description: 'Human (Homo sapiens) (b37): hg19' 538 | id: hg19 539 | indexers: 540 | - data_manager_hisat2_index_builder 541 | - data_manager_star_index_builder 542 | 543 | - dbkey: hg19 544 | description: 'Human (Homo sapiens) (b37): hg19 Canonical Female' 545 | id: hg19female 546 | indexers: 547 | - data_manager_hisat2_index_builder 548 | 549 | - dbkey: hg19 550 | description: 'Human (Homo sapiens) (b37): hg19 Canonical' 551 | id: hg19canon 552 | indexers: 553 | - data_manager_hisat2_index_builder 554 | 555 | - dbkey: hg38 556 | description: 'Human (Homo sapiens) (b38): hg38 Canonical Female' 557 | id: hg38female 558 | indexers: 559 | - data_manager_hisat2_index_builder 560 | 561 | - dbkey: hg38 562 | description: 'Human (Homo sapiens) (b38): hg38 Canonical' 563 | id: hg38canon 564 | indexers: 565 | - data_manager_hisat2_index_builder 566 | 567 | - dbkey: hg38 568 | description: 'Human (Homo sapiens) (b38): hg38' 569 | id: hg38 570 | indexers: 571 | - data_manager_hisat2_index_builder 572 | - data_manager_star_index_builder 573 | 574 | - description: NCBI-2015-10-05 575 | id: ncbi-2015-10-05 576 | indexers: 577 | - data_manager_fetch_ncbi_taxonomy 578 | 579 | - description: 22Gv1.1 580 | id: 22Gv1.1 581 | indexers: 582 | - data_manager_plant_tribes_scaffolds_downloader 583 | 584 | - description: 26 plant genomes (Angiosperms clusters, version 2.0) 585 | id: 26Gv2.0 586 | indexers: 587 | - data_manager_plant_tribes_scaffolds_downloader 588 | 589 | - description: 37 plant genomes (Angiosperms clusters, version 1.0) 590 | id: 37Gv1.0 591 | indexers: 592 | - data_manager_plant_tribes_scaffolds_downloader 593 | 594 | - dbkey: rn5 595 | description: 'Rat (Rattus norvegicus): rn5' 596 | id: rn5 597 | indexers: 598 | - data_manager_star_index_builder 599 | -------------------------------------------------------------------------------- /data_managers.yml: -------------------------------------------------------------------------------- 1 | data_manager_fetch_genome_dbkeys_all_fasta: 2 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_genome_dbkeys_all_fasta/data_manager_fetch_genome_all_fasta_dbkey/0.0.4' 3 | tags: 4 | - fetch_source 5 | data_manager_bowtie2_index_builder: 6 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_bowtie2_index_builder/bowtie2_index_builder_data_manager/2.3.4.3' 7 | tags: 8 | - genome 9 | data_manager_bwa_mem_index_builder: 10 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_bwa_mem_index_builder/bwa_mem_index_builder_data_manager/0.0.3' 11 | tags: 12 | - genome 13 | parameters: 14 | index_algorithm: bwtsw 15 | data_manager_hisat_index_builder: 16 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_hisat_index_builder/hisat_index_builder_data_manager/1.0.0' 17 | tags: 18 | - genome 19 | data_manager_twobit_builder: 20 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_twobit_builder/twobit_builder_data_manager/0.0.2' 21 | tags: 22 | - genome 23 | data_manager_picard_index_builder: 24 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_picard_index_builder/picard_index_builder_data_manager/2.7.1' 25 | tags: 26 | - genome 27 | data_manager_sam_fasta_index_builder: 28 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_sam_fasta_index_builder/sam_fasta_index_builder/0.0.2' 29 | tags: 30 | - genome 31 | data_manager_hisat2_index_builder: 32 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_hisat2_index_builder/hisat2_index_builder_data_manager/2.0.5' 33 | tags: 34 | - genome 35 | data_manager_star_index_builder: 36 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_star_index_builder/rna_star_index_builder_data_manager/2.7.4a+galaxy1' 37 | tags: 38 | - genome 39 | data_manager_bowtie_index_builder: 40 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_bowtie_index_builder/bowtie_color_space_index_builder_data_manager/0.0.2' 41 | tags: 42 | - genome 43 | data_manager_kallisto_index_builder: 44 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_kallisto_index_builder/kallisto_index_builder_data_manager/0.43.1' 45 | tags: 46 | - genome 47 | data_manager_snpeff: 48 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_snpeff/data_manager_snpeff_databases/4.3r' 49 | tags: 50 | - snpeff 51 | data_manager_plant_tribes_scaffolds_downloader: 52 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_plant_tribes_scaffolds_downloader/data_manager_plant_tribes_scaffolds_download/1.1.0' 53 | tags: 54 | - plant_source 55 | data_manager_fetch_ncbi_taxonomy: 56 | tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_ncbi_taxonomy/ncbi_taxonomy_fetcher/1.0.0' 57 | tags: 58 | - tax_source 59 | data_manager_gemini_database_downloader: 60 | tool_id: 'toolshed.g2.bx.psu.edu/repos/iuc/data_manager_gemini_database_downloader/data_manager_gemini_download/0.20.1' 61 | tags: 62 | - gemini 63 | data_manager_build_kraken2_database: 64 | tool_id: toolshed.g2.bx.psu.edu/repos/iuc/data_manager_build_kraken2_database/kraken2_build_database/2.1.2+galaxy1 65 | tags: 66 | - kraken2 67 | data_manager_build_bracken_database: 68 | tool_id: toolshed.g2.bx.psu.edu/repos/iuc/data_manager_build_bracken_database/bracken_build_database/2.8+galaxy0 69 | parameters: 70 | kraken_db: "{{ item.id }}" 71 | check_prebuilt: 72 | prebuilt: yes 73 | tags: 74 | - bracken 75 | -------------------------------------------------------------------------------- /genomes.yml: -------------------------------------------------------------------------------- 1 | genomes: 2 | - dbkey: dm6 3 | description: # set from UCSC 4 | id: dm6 5 | source: ucsc 6 | doi: 7 | version: 8 | checksum: 9 | blob: 10 | indexers: 11 | - data_manager_bowtie2_index_builder 12 | - data_manager_bwa_mem_index_builder 13 | - data_manager_twobit_builder 14 | - data_manager_picard_index_builder 15 | - data_manager_sam_fasta_index_builder 16 | - data_manager_hisat2_index_builder 17 | - data_manager_star_index_builder 18 | #- data_manager_bowtie_index_builder 19 | #- data_manager_kallisto_index_builder 20 | skiplist: 21 | - bfast 22 | - dbkey: danRer10 23 | description: # set from UCSC 24 | id: danRer10 25 | source: ucsc 26 | doi: 27 | version: 28 | checksum: 29 | blob: 30 | indexers: 31 | - data_manager_bowtie2_index_builder 32 | - data_manager_bwa_mem_index_builder 33 | - data_manager_twobit_builder 34 | - data_manager_picard_index_builder 35 | - data_manager_sam_fasta_index_builder 36 | - data_manager_hisat2_index_builder 37 | - data_manager_star_index_builder 38 | #- data_manager_bowtie_index_builder 39 | #- data_manager_kallisto_index_builder 40 | skiplist: 41 | - bfast 42 | - dbkey: sacCer3 43 | description: # set from UCSC 44 | id: sacCer3 45 | source: ucsc 46 | doi: 47 | version: 48 | checksum: 49 | blob: 50 | indexers: 51 | - data_manager_bowtie2_index_builder 52 | - data_manager_bwa_mem_index_builder 53 | - data_manager_twobit_builder 54 | - data_manager_picard_index_builder 55 | - data_manager_sam_fasta_index_builder 56 | - data_manager_hisat2_index_builder 57 | - data_manager_star_index_builder 58 | #- data_manager_bowtie_index_builder 59 | #- data_manager_kallisto_index_builder 60 | skiplist: 61 | - bfast 62 | - dbkey: Ecoli-O157-H7-Sakai 63 | description: "Escherichia coli O157-H7 Sakai" 64 | id: Ecoli-O157-H7-Sakai 65 | source: https://swift.rc.nectar.org.au:8888/v1/AUTH_377/public/COMP90014/Assignment1/Ecoli-O157_H7-Sakai-chr.fna 66 | doi: 67 | version: 68 | checksum: 1d769fcb47f631c359e0b9407155e34325b223fba4d1f208f7ad8c353f5ab560 69 | blob: 70 | indexers: 71 | - data_manager_bowtie2_index_builder 72 | - data_manager_bwa_mem_index_builder 73 | - data_manager_twobit_builder 74 | - data_manager_picard_index_builder 75 | - data_manager_sam_fasta_index_builder 76 | - data_manager_hisat2_index_builder 77 | - data_manager_star_index_builder 78 | #- data_manager_bowtie_index_builder 79 | #- data_manager_kallisto_index_builder 80 | skiplist: 81 | - bfast 82 | - dbkey: Salm-enterica-Newport 83 | description: "Salmonella enterica subsp. enterica serovar Newport str. USMARC-S3124.1" 84 | id: Salm-enterica-Newport 85 | source: NC_021902 86 | doi: 87 | version: 88 | checksum: 8d557e48aa3268afd0177a537a9ae396e6a6c123e4e0d5386bc1fa5a18c3054b 89 | blob: "Extra information for this genome is at: https://www.ncbi.nlm.nih.gov/genome/152?genome_assembly_id=299243" 90 | indexers: 91 | - data_manager_bowtie2_index_builder 92 | - data_manager_bwa_mem_index_builder 93 | - data_manager_twobit_builder 94 | - data_manager_picard_index_builder 95 | - data_manager_sam_fasta_index_builder 96 | - data_manager_hisat2_index_builder 97 | - data_manager_star_index_builder 98 | #- data_manager_bowtie_index_builder 99 | #- data_manager_kallisto_index_builder 100 | skiplist: 101 | - bfast 102 | -------------------------------------------------------------------------------- /run_builder.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | : ${GALAXY_DOCKER_IMAGE:="quay.io/bgruening/galaxy"} 6 | : ${GALAXY_PORT:="8080"} 7 | : ${GALAXY_DEFAULT_ADMIN_USER:="admin@galaxy.org"} 8 | : ${GALAXY_DEFAULT_ADMIN_PASSWORD:="password"} 9 | : ${EXPORT_DIR:="/mnt/data/export/"} 10 | : ${DATA_MANAGER_DATA_PATH:="${EXPORT_DIR}/data_manager"} 11 | 12 | : ${PLANEMO_PROFILE_NAME:="wxflowtest"} 13 | : ${PLANEMO_SERVE_DATABASE_TYPE:="postgres"} 14 | 15 | GALAXY_URL="http://localhost:$GALAXY_PORT" 16 | 17 | git diff --name-only $TRAVIS_COMMIT_RANGE -- '*.yml' '*.yaml' > changed_files.txt 18 | echo "Following files have changed." 19 | cat changed_files.txt 20 | 21 | if [ ! -f .venv ]; then 22 | virtualenv .venv 23 | . .venv/bin/activate 24 | pip install -U pip 25 | pip install ephemeris 26 | fi 27 | 28 | echo 'ephemeris installed' 29 | 30 | . .venv/bin/activate 31 | 32 | mkdir -p ${DATA_MANAGER_DATA_PATH} 33 | 34 | sudo cp scripts/job_conf.xml ${EXPORT_DIR}/job_conf.xml 35 | 36 | docker run -d --rm -v ${EXPORT_DIR}:/export/ -e GALAXY_CONFIG_JOB_CONFIG_FILE=/export/job_conf.xml -e GALAXY_CONFIG_GALAXY_DATA_MANAGER_DATA_PATH=/export/data_manager/ -e GALAXY_CONFIG_WATCH_TOOL_DATA_DIR=True -p 8080:80 --name idc_builder ${GALAXY_DOCKER_IMAGE} 37 | 38 | echo 'Waitng for Galaxy' 39 | 40 | galaxy-wait -g ${GALAXY_URL} 41 | 42 | chmod 0777 ${DATA_MANAGER_DATA_PATH} 43 | 44 | 45 | #if [ -s changed_files.txt ] 46 | #then 47 | # for FILE in `cat changed_files.txt`; 48 | # do 49 | # if [[ $FILE == *"data-managers"* ]]; then 50 | # #### RUN single data managers 51 | # shed-tools install -d $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 52 | # run-data-managers --config $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 53 | # elif [[ $FILE == *"idc-workflows"* ]]; then 54 | # #### RUN the pipline for new genome 55 | # shed-tools install -d $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 56 | # run-data-managers --config $FILE -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 57 | # fi 58 | # done 59 | #fi 60 | 61 | echo 'Installing Data Managers' 62 | # Install the data managers 63 | _idc-data-managers-to-tools 64 | shed-tools install -t tools.yml -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 65 | 66 | echo 'Fetching new genomes' 67 | #Run make_fetch.py to build the fetch manager config file for ephemeris 68 | python scripts/make_fetch.py -g genomes.yml -x ${EXPORT_DIR}/galaxy-central/config/shed_data_manager_conf.xml 69 | #cat data_managers_fetch.yml genomes.yml > fetch.yml 70 | run-data-managers --config fetch.yml -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 71 | 72 | echo 'Restarting Galaxy' 73 | #Restart Galaxy to reload the data tables 74 | docker exec idc_builder supervisorctl restart galaxy: 75 | galaxy-wait -g ${GALAXY_URL} 76 | sleep 20 77 | 78 | echo 'Building new indices' 79 | #Run the make_dm_genomes.py script to create the list of index builders and genomes and pass it to ephemeris 80 | python scripts/make_dm_genomes.py -d data_managers_tools.yml -x ${EXPORT_DIR}/galaxy-central/config/shed_data_manager_conf.xml -g genomes.yml 81 | run-data-managers --config dm_genomes.yml -g ${GALAXY_URL} -u $GALAXY_DEFAULT_ADMIN_USER -p $GALAXY_DEFAULT_ADMIN_PASSWORD 82 | 83 | 84 | ls -l ${DATA_MANAGER_DATA_PATH} 85 | 86 | rm fetch.yml 87 | rm dm_genomes.yml 88 | 89 | docker stop idc_builder 90 | -------------------------------------------------------------------------------- /scripts/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker stop idc_builder 4 | sudo rm -rf /mnt/data/export 5 | rm fetch.yml 6 | -------------------------------------------------------------------------------- /scripts/job_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | /usr/lib/slurm-drmaa/lib/libdrmaa.so 6 | true 7 | 8 | 9 | true 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | true 23 | --ntasks=32 --share 24 | 25 | 26 | 27 | true 28 | --ntasks=1 --share 29 | 30 | 31 | 32 | true 33 | --ntasks=2 --share 34 | 35 | 36 | 37 | true 38 | vanilla 39 | 40 | 41 | 42 | true 43 | vanilla 44 | 45 | 46 | 47 | 1 48 | 1 49 | 1 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /scripts/make_dm_genomes.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | import yaml 4 | import xml.etree.ElementTree as ET 5 | import re 6 | import argparse 7 | 8 | 9 | def main(): 10 | 11 | VERSION = 0.1 12 | 13 | parser = argparse.ArgumentParser(description="") 14 | parser.add_argument( 15 | "-d", 16 | "--data_managers_file", 17 | required=True, 18 | help="The data managers tool .yml file.", 19 | ) 20 | parser.add_argument( 21 | "-x", 22 | "--shed_data_managers_conf_file", 23 | required=True, 24 | help="Path to the shed_data_managers_conf.xml file", 25 | ) 26 | parser.add_argument( 27 | "-g", "--genome_file", required=True, help="The genome yaml file to read." 28 | ) 29 | parser.add_argument( 30 | "-o", 31 | "--outfile", 32 | default="dm_genomes.yml", 33 | help="The name of the output file to produce.", 34 | ) 35 | parser.add_argument("--version", action="store_true") 36 | parser.add_argument("--verbose", action="store_true") 37 | 38 | args = parser.parse_args() 39 | 40 | if args.version: 41 | print("make_fetch.py version: %.1f" % VERSION) 42 | return 43 | 44 | # Set up the output dictionary 45 | out_conf = {"data_managers": [], "genomes": []} 46 | 47 | # Read in the data managers file and store the names in an array 48 | data_managers_tools = yaml.safe_load(open(args.data_managers_file, "r")) 49 | dms = [] 50 | for dm in data_managers_tools["tools"]: 51 | if "genome" in dm["tags"]: 52 | dms.append(dm["name"]) 53 | if args.verbose: 54 | print("Data managers array: %s" % dms) 55 | 56 | # Read in the shed_data_managers_conf.xml file and build a dictionary of 57 | # name, id and data tables to update and add them to 58 | # out_conf if they appear in dms 59 | tree = ET.parse(args.shed_data_managers_conf_file) 60 | root = tree.getroot() 61 | for data_manager in root: 62 | name = "" 63 | repo = "" 64 | tables = [] 65 | for tool in data_manager: 66 | if tool.tag == "tool": 67 | for x in tool: 68 | if x.tag == "id": 69 | name = x.text 70 | elif x.tag == "repository_name": 71 | repo = x.text 72 | elif tool.tag == "data_table": 73 | tables.append(tool.attrib["name"]) 74 | if repo in dms: 75 | dm = {} 76 | dm["id"] = name 77 | dm["params"] = [ 78 | {"all_fasta_source": "{{ item.id }}"}, 79 | {"sequence_name": "{{ item.name }}"}, 80 | {"sequence_id": "{{ item.id }}"}, 81 | ] 82 | if re.search("bwa", dm["id"]): 83 | dm["params"].append({"index_algorithm": "bwtsw"}) 84 | if re.search("color_space", dm["id"]): 85 | continue 86 | dm["items"] = "{{ genomes }}" 87 | dm["data_table_reload"] = tables 88 | out_conf["data_managers"].append(dm) 89 | 90 | # Read in the genome file. 91 | genomes = yaml.safe_load(open(args.genome_file, "r")) 92 | 93 | out_conf["genomes"] = genomes["genomes"] 94 | 95 | with open(args.outfile, "w") as out: 96 | yaml.dump(out_conf, out, default_flow_style=False) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /scripts/make_fetch.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | import yaml 4 | import xml.etree.ElementTree as ET 5 | import re 6 | import argparse 7 | 8 | 9 | def main(): 10 | 11 | VERSION = 0.2 12 | 13 | parser = argparse.ArgumentParser(description="") 14 | parser.add_argument( 15 | "-g", "--genome_file", required=True, help="The genome yaml file to read." 16 | ) 17 | parser.add_argument( 18 | "-x", 19 | "--shed_data_managers_conf_file", 20 | required=True, 21 | help="Path to the shed_data_managers_conf.xml file", 22 | ) 23 | parser.add_argument( 24 | "-o", 25 | "--outfile", 26 | default="fetch.yml", 27 | help="The name of the output file to produce.", 28 | ) 29 | parser.add_argument("--version", action="store_true") 30 | parser.add_argument("--verbose", action="store_true") 31 | 32 | args = parser.parse_args() 33 | 34 | if args.version: 35 | print("make_fetch.py version: %.1f" % VERSION) 36 | return 37 | 38 | # Read in the shed_data_managers_tool file to get the id of the fetch dm tool. 39 | tree = ET.parse(args.shed_data_managers_conf_file) 40 | root = tree.getroot() 41 | fetch_dm_tool = "" 42 | for data_manager in root: 43 | for tool in data_manager: 44 | if tool.tag == "tool": 45 | for x in tool: 46 | if x.tag == "id": 47 | if re.search(r"fetch_genome", x.text): 48 | fetch_dm_tool = x.text 49 | 50 | # Read in the genome file. 51 | genomes = yaml.safe_load(open(args.genome_file, "r")) 52 | 53 | dm = {"data_managers": []} 54 | 55 | for genome in genomes["genomes"]: 56 | # make the start 57 | out = {"id": fetch_dm_tool} 58 | out["params"] = [] 59 | out["params"].append({"dbkey_source|dbkey": genome["id"]}) 60 | if genome["source"] == "ucsc": 61 | out["params"].append({"reference_source|reference_source_selector": "ucsc"}) 62 | out["params"].append({"reference_source|requested_dbkey": genome["id"]}) 63 | elif re.match("^[A-Z_]+[0-9.]+", genome["source"]): 64 | out["params"].append({"dbkey_source|dbkey_source_selector": "new"}) 65 | out["params"].append({"reference_source|reference_source_selector": "ncbi"}) 66 | out["params"].append( 67 | {"reference_source|requested_identifier": genome["source"]} 68 | ) 69 | out["params"].append({"sequence_name": genome["description"]}) 70 | out["params"].append({"sequence.id": genome["id"]}) 71 | elif re.match("^http", genome["source"]): 72 | out["params"].append({"dbkey_source|dbkey_source_selector": "new"}) 73 | out["params"].append({"reference_source|reference_source_selector": "url"}) 74 | out["params"].append({"reference_source|user_url": genome["source"]}) 75 | out["params"].append({"sequence_name": genome["description"]}) 76 | out["params"].append({"sequence.id": genome["id"]}) 77 | out["data_table_reload"] = ["all_fasta", "__dbkeys__"] 78 | 79 | dm["data_managers"].append(out) 80 | 81 | with open(args.outfile, "w") as out: 82 | yaml.dump(dm, out, default_flow_style=False) 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | --------------------------------------------------------------------------------