├── .github └── workflows │ └── build-multi-pex.yml ├── .gitignore ├── .pylintrc ├── CITATION.cff ├── DISCLAIMER.md ├── LICENSE.md ├── Makefile ├── Makefile-create-blastdb-metadata ├── Makefile-gcp-elb-janitor ├── README.md ├── bin ├── aws-create-elastic-blast-janitor-role.sh ├── aws-delete-elastic-blast-janitor-role.sh ├── aws-describe-elastic-blast-janitor-role.sh ├── aws-get-auto-scaling-events.sh ├── aws-show-my-undeleted-searches.sh ├── blast-tuner.py ├── cleanup-stale-gcp-resources.py ├── create-blastdb-metadata.py ├── elastic-blast ├── fasta_split.py ├── gcp-setup-elastic-blast-janitor.sh ├── gcp-show-my-undeleted-searches.sh └── results2clustername.sh ├── docker-blast ├── Dockerfile ├── Dockerfile-build-from-local-sources ├── LICENSE.md ├── Makefile ├── README.md ├── awscloudbuild.yaml ├── cloudbuild.yaml ├── fasta-split ├── requirements.txt ├── splitq_download_db_search ├── test-docker-image-aws.yaml ├── test-docker-image-gcp.yaml └── test_data │ ├── Garbage_query_file.fa │ ├── pdbnt.nin │ ├── pdbnt.nnd │ ├── pdbnt.nni │ ├── pdbnt.nog │ ├── pdbnt.nos │ ├── pdbnt.not │ ├── pdbnt.ntf │ └── pdbnt.nto ├── docker-demo ├── .gcloudignore ├── Dockerfile ├── Dockerfile-build-from-local-sources ├── Makefile ├── Makefile-demo.mk ├── README.md ├── cloudbuild.yaml ├── elb-aws-blastn-mane-pdbnt.ini ├── elb-gcp-blastn-mane-pdbnt.ini ├── elb-gcp-blastp-coala-refseq-prot.ini ├── elb-gcp-blastx-mane-swissprot.ini ├── requirements.txt └── test-cloudbuild.yaml ├── docker-janitor ├── Dockerfile-build-from-local-sources.gcp ├── Dockerfile.gcp ├── Makefile ├── README.md ├── cloudbuild.yaml ├── cloudrun.yaml ├── elastic-blast-janitor.sh └── requirements.txt ├── docker-job-submit ├── .gitignore ├── Dockerfile-build-from-local-sources.aws ├── Dockerfile.aws ├── Dockerfile.gcp ├── Makefile ├── awscloudbuild.yaml ├── awscloudrun.yaml ├── cloud-job-submit.sh ├── cloudbuild.yaml ├── cloudrun.yaml ├── requirements.txt └── submit_jobs.py ├── docker-qs ├── Dockerfile ├── Dockerfile-build-from-local-sources ├── Makefile ├── README.md ├── awscloudbuild.yaml ├── cloudbuild.yaml ├── cloudrun.yaml ├── requirements.txt ├── run.sh └── test-cloudbuild.yaml ├── docs └── README-fasta-split.md ├── lambda-janitor ├── .gitignore ├── Makefile ├── README.md ├── janitor-test-stack.yaml ├── lambda_elb.py ├── requirements-for-testing.txt ├── requirements.txt └── trust-policy.json ├── pex-cloudbuild.yaml ├── requirements ├── base.txt └── test.txt ├── setup.cfg ├── setup.cfg_cloud ├── setup.py ├── share └── etc │ ├── elastic-blast-aws-iam-policy.json.template │ ├── elb-aws-blastn-nt-8-nodes.ini │ ├── elb-blastp-nr.ini │ └── yamllint-config.yaml ├── src ├── .gitignore └── elastic_blast │ ├── __init__.py │ ├── aws.py │ ├── aws_traits.py │ ├── base.py │ ├── commands │ ├── __init__.py │ ├── delete.py │ ├── run_summary.py │ ├── status.py │ └── submit.py │ ├── config.py │ ├── constants.py │ ├── db_metadata.py │ ├── elasticblast.py │ ├── elasticblast_factory.py │ ├── elb_config.py │ ├── filehelper.py │ ├── gcp.py │ ├── gcp_traits.py │ ├── janitor.py │ ├── jobs.py │ ├── kubernetes.py │ ├── object_storage_utils.py │ ├── resources │ ├── __init__.py │ └── quotas │ │ ├── __init__.py │ │ ├── quota_aws_batch.py │ │ ├── quota_aws_ec2_cf.py │ │ └── quota_check.py │ ├── split.py │ ├── subst.py │ ├── taxonomy.py │ ├── templates │ ├── blast-batch-job-local-ssd.yaml.template │ ├── blast-batch-job.yaml.template │ ├── cloudformation-admin-iam.yaml │ ├── elastic-blast-cf.yaml │ ├── elastic-blast-janitor-cf.yaml │ ├── elb-janitor-cronjob.yaml.template │ ├── elb-janitor-rbac.yaml │ ├── job-cloud-split-local-ssd.yaml.template │ ├── job-init-local-ssd.yaml.template │ ├── job-init-pv.yaml.template │ ├── job-submit-jobs.yaml.template │ ├── pvc-rom.yaml.template │ ├── pvc-rwo.yaml.template │ ├── storage-gcp-ssd.yaml │ ├── storage-gcp.yaml │ ├── volume-snapshot-class.yaml │ └── volume-snapshot.yaml │ ├── tuner.py │ └── util.py ├── tests ├── __init__.py ├── app │ ├── __init__.py │ ├── data │ │ ├── bad_bucket_conf.ini │ │ ├── blastdb-notfound.ini │ │ ├── cleanup-error.ini │ │ ├── cluster-error.ini │ │ ├── good_conf.ini │ │ ├── incomplete-mem-limit-optimal-aws-machine-type.ini │ │ ├── invalid-blast-opt-no-closing-quote.ini │ │ ├── invalid-cpu-req-gcp.ini │ │ ├── invalid-dollar-sign-char.ini │ │ ├── invalid-machine-type-aws.ini │ │ ├── invalid-machine-type-gcp.ini │ │ ├── invalid-mem-req.ini │ │ ├── no-num-cpus-optimal-aws-machine-type.ini │ │ ├── query.fa │ │ └── too-many-k8s-jobs.ini │ ├── elastic_blast_app.py │ ├── gcloud │ ├── pytest.ini │ └── test_elasticblast.py ├── aws │ ├── __init__.py │ ├── data │ │ ├── aws-no-security-group.ini │ │ ├── aws-no-subnets.ini │ │ ├── aws-wrong-custom-db.ini │ │ └── aws-wrong-provider-custom-db.ini │ ├── pytest.ini │ └── test_aws.py ├── aws_traits │ ├── __init__.py │ ├── pytest.ini │ └── test_aws_traits.py ├── base │ ├── __init__.py │ ├── pytest.ini │ └── test_base.py ├── blastdb │ ├── testdb.pdb │ ├── testdb.phr │ ├── testdb.pin │ ├── testdb.pog │ ├── testdb.pos │ ├── testdb.pot │ ├── testdb.psq │ ├── testdb.ptf │ └── testdb.pto ├── config │ ├── __init__.py │ ├── data │ │ ├── aws-defaults-cluster-name.ini │ │ ├── aws-defaults.ini │ │ ├── correct-cfg-file.ini │ │ ├── corrupt-cfg-file.ini │ │ ├── elb-aws-blastn-pdbnt.ini │ │ ├── gcp-defaults.ini │ │ ├── incomplete-gcp-vpc-cfg-file.ini │ │ ├── instance-too-small-aws.ini │ │ ├── instance-too-small-gcp.ini │ │ ├── invalid-parameters.ini │ │ ├── mem-limit-too-high.ini │ │ ├── minimal-cfg-file.ini │ │ ├── missing-required-parameters.ini │ │ ├── multiple-query-files.ini │ │ └── optional-cfg-file.ini │ ├── pytest.ini │ └── test_config.py ├── cost │ ├── __init__.py │ ├── data │ │ └── aws-run-summary.json │ └── pytest.ini ├── db_metadata │ ├── __init__.py │ ├── pytest.ini │ └── test_db_metadata.py ├── elb_config │ ├── __init__.py │ ├── pytest.ini │ └── test_elb_config.py ├── fasta_split │ ├── __init__.py │ ├── job-batch.yaml.template │ ├── performance-test.sh │ ├── pytest.ini │ ├── test_fasta_split.py │ └── testdata │ │ ├── actually_gzipped_fasta.fa │ │ ├── actually_not_tarred_fasta.fa.tar │ │ ├── actually_not_zipped_fasta.fa.gz │ │ ├── e7ebd4c9-d8a3-405c-8180-23b85f1709a7.fa.gz │ │ ├── empty_file.fa │ │ ├── fasta.fa.gz │ │ └── test_template ├── filehelper │ ├── __init__.py │ ├── data │ │ └── test.tar │ ├── pytest.ini │ ├── test_aws_filesystem.py │ ├── test_filesystem_checks.py │ └── test_local_access.py ├── gcp │ ├── __init__.py │ ├── data │ │ └── test-cfg-file.ini │ ├── pytest.ini │ ├── test_cluster_api.py │ └── test_gcp.py ├── gcp_traits │ ├── __init__.py │ ├── pytest.ini │ └── test_gcp_traits.py ├── integration-test-for-failure-with-code.sh ├── integration-test-for-job-failure.sh ├── integration-test.sh ├── jobs │ ├── __init__.py │ ├── pytest.ini │ └── test_jobs.py ├── kubernetes │ ├── __init__.py │ ├── data │ │ ├── initialize_persistent_disk.ini │ │ ├── job-status-failed.json │ │ └── job-status.json │ ├── pytest.ini │ ├── test-job-init-pv.yaml │ ├── test-pvc.yaml │ ├── test-storage-gcp.yaml │ └── test_kubernetes.py ├── resources │ ├── __init__.py │ └── quotas │ │ ├── __init__.py │ │ ├── pytest.ini │ │ └── test_quotas.py ├── run-summary │ ├── data │ │ ├── aws-output-sample-aggregate.log │ │ ├── aws-output-sample-failed-aggregate.log │ │ ├── run_summary_sample.json │ │ └── run_summary_sample_failed.json │ ├── pytest.ini │ └── test_run_summary.py ├── run_summary_correctness_test.py ├── split │ ├── __init__.py │ ├── pytest.ini │ └── test_split.py ├── status │ ├── __init__.py │ ├── data │ │ └── status-test.ini │ ├── pytest.ini │ └── test_status.py ├── submit │ ├── __init__.py │ ├── data │ │ ├── blastdb-notfound.ini │ │ └── elb-blastn-neg-taxidfiltering.ini │ ├── pytest.ini │ └── test_submit.py ├── subst │ ├── __init__.py │ ├── pytest.ini │ └── test_subst.py ├── taxonomy │ ├── __init__.py │ ├── pytest.ini │ └── test_taxonomy.py ├── tc-bash-runner.sh ├── test-resubmission-to-same-results-bucket.sh ├── tuner │ ├── __init__.py │ ├── pytest.ini │ └── test_tuner.py ├── util │ ├── __init__.py │ ├── pytest.ini │ └── test_util.py └── utils.py └── tox.ini /.github/workflows/build-multi-pex.yml: -------------------------------------------------------------------------------- 1 | name: Build elastic-blast PEX files 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build-single: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.9"] 11 | 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v3 15 | with: 16 | fetch-depth: 0 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Build PEX file 22 | run: | 23 | pip3 install wheel glob2 24 | pip3 install -r requirements/test.txt 25 | pex --python-shebang='/usr/bin/env python3' --disable-cache . -r requirements/base.txt --python=python${{ matrix.python-version }} -c elastic-blast -o elastic-blast 26 | ./elastic-blast --version 27 | ls -l elastic-blast 28 | md5sum elastic-blast > elastic-blast.md5 29 | - name: Create tarball 30 | run: tar -czvf elastic-blast-no-suffix.tar.gz elastic-blast elastic-blast.md5 31 | - name: Produce downloadable artifact 32 | uses: actions/upload-artifact@v4 33 | with: 34 | name: elastic-blast-no-suffix 35 | path: elastic-blast-no-suffix.tar.gz 36 | retention-days: 1 37 | 38 | build-multiple: 39 | runs-on: ubuntu-latest 40 | strategy: 41 | matrix: 42 | python-version: ["3.9", "3.10", "3.11", "3.13"] 43 | 44 | steps: 45 | 46 | - name: Checkout 47 | uses: actions/checkout@v3 48 | with: 49 | fetch-depth: 0 50 | - name: Set up Python ${{ matrix.python-version }} 51 | uses: actions/setup-python@v4 52 | with: 53 | python-version: ${{ matrix.python-version }} 54 | - name: Build PEX file 55 | run: | 56 | pip3 install wheel glob2 57 | pip3 install -r requirements/test.txt 58 | pex --python-shebang='/usr/bin/env python3' --disable-cache . -r requirements/base.txt --python=python${{ matrix.python-version }} -c elastic-blast -o elastic-blast${{ matrix.python-version }} 59 | ./elastic-blast${{ matrix.python-version }} --version 60 | ls -l ./elastic-blast${{ matrix.python-version }} 61 | md5sum elastic-blast${{ matrix.python-version }} > elastic-blast${{ matrix.python-version }}.md5 62 | - name: Create tarball 63 | run: tar -czvf elastic-blast${{ matrix.python-version }}.tar.gz elastic-blast${{ matrix.python-version }} elastic-blast${{ matrix.python-version }}.md5 64 | - name: Produce downloadable artifact 65 | uses: actions/upload-artifact@v4 66 | with: 67 | name: elastic-blast-${{ matrix.python-version }} 68 | path: elastic-blast${{ matrix.python-version }}.tar.gz 69 | retention-days: 1 70 | 71 | produce-archive: 72 | needs: [build-single, build-multiple] 73 | runs-on: ubuntu-latest 74 | steps: 75 | - uses: actions/checkout@v3 76 | - name: Download artifacts 77 | uses: actions/download-artifact@v4 78 | - name: Display structure of downloaded files 79 | run: ls -lR 80 | - name: Create tarball 81 | run: | 82 | tar axvf elastic-blast-no-suffix/elastic-blast-no-suffix.tar.gz 83 | tar axvf elastic-blast-3.9/elastic-blast3.9.tar.gz 84 | tar axvf elastic-blast-3.10/elastic-blast3.10.tar.gz 85 | tar axvf elastic-blast-3.11/elastic-blast3.11.tar.gz 86 | tar axvf elastic-blast-3.13/elastic-blast3.13.tar.gz 87 | rm -fvr elastic-blast-*.tar.gz 88 | tar -czvf ~/elastic-blast.tar.gz elastic-blast elastic-blast*md5 elastic-blast3.* 89 | - name: 'Upload Artifact' 90 | uses: actions/upload-artifact@v4 91 | with: 92 | name: elastic-blast 93 | path: ~/elastic-blast.tar.gz 94 | 95 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | pvc.yaml 3 | job-copy-queries.yaml 4 | job-init-pv.yaml 5 | blast-batch-*.yaml 6 | *.out 7 | .vscode/ 8 | kubectl 9 | *.asn 10 | __pycache__ 11 | .env 12 | .env-test 13 | .eggs 14 | *.egg-info 15 | dist 16 | build 17 | MANIFEST 18 | elastic-blast.log 19 | elb-run-summary.json 20 | .mypy_cache 21 | .pytest_cache 22 | *.tgz 23 | *.tar.gz 24 | total_count 25 | /jobs 26 | /*.fa 27 | /*.fna 28 | /*.fsa 29 | .tox/ 30 | htmlcov/ 31 | .coverage* 32 | /elb-cost 33 | /blast-tuner 34 | tags 35 | cscope.* 36 | /elastic-blast 37 | elastic-blast3.* 38 | kubeconfig.yaml 39 | creds.sh 40 | /*logs 41 | iam-policy.json 42 | clouseau* 43 | submit-and-wait-for-results.sh 44 | aws-docker-login.txt 45 | elb-run-report.csv 46 | aws-credentials 47 | .elb-venv 48 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: "1.2.0" 2 | message: "If you use this software, please cite it using these metadata." 3 | title: ElasticBLAST 4 | version: "1.4.0" 5 | date-released: 2025-03-17 6 | license: "NCBI Public Domain" 7 | repository-code: "https://github.com/ncbi/elastic-blast/" 8 | url: "https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/" 9 | authors: 10 | - family-names: Camacho 11 | given-names: Christiam E. 12 | orcid: https://orcid.org/0000-0002-6709-8298 13 | - family-names: Boratyn 14 | given-names: Greg 15 | - family-names: Joukov 16 | given-names: Victor 17 | orcid: https://orcid.org/0000-0003-2962-8902 18 | - family-names: Merezhuk 19 | given-names: Yuri 20 | - family-names: Madden 21 | given-names: Thomas 22 | orcid: https://orcid.org/0000-0003-1641-7321 23 | preferred-citation: 24 | type: article 25 | authors: 26 | - family-names: Camacho 27 | given-names: Christiam 28 | orcid: https://orcid.org/0000-0002-6709-8298 29 | - family-names: Boratyn 30 | given-names: Grzegorz M 31 | - family-names: Joukov 32 | given-names: Victor 33 | orcid: https://orcid.org/0000-0003-2962-8902 34 | - family-names: Vera Alvarez 35 | given-names: Roberto 36 | - family-names: Madden 37 | given-names: Thomas L 38 | orcid: https://orcid.org/0000-0003-1641-7321 39 | doi: 10.1186/s12859-023-05245-9 40 | journal: BMC Bioinformatics 41 | month: 3 42 | start: 117 43 | title: "ElasticBLAST: accelerating sequence search via cloud computing" 44 | issue: 1 45 | volume: 24 46 | year: 2023 47 | -------------------------------------------------------------------------------- /DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | Although all reasonable efforts have been taken to ensure the accuracy 2 | and reliability of the software and data, the NLM and the U.S. 3 | Government do not and cannot warrant the performance or results that 4 | may be obtained by using this software or data. The NLM and the U.S. 5 | Government disclaim all warranties, express or implied, including 6 | warranties of performance, merchantability or fitness for any 7 | particular purpose. 8 | 9 | Users of ElasticBLAST are solely responsible for any and all cloud service 10 | provider charges associated with their use of ElasticBLAST.  11 | 12 | See also: [LICENSE](LICENSE.md) 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | PUBLIC DOMAIN NOTICE 2 | National Center for Biotechnology Information 3 | 4 | This software is a "United States Government Work" under the 5 | terms of the United States Copyright Act. It was written as part of 6 | the authors' official duties as United States Government employees and 7 | thus cannot be copyrighted. This software is freely available 8 | to the public for use. The National Library of Medicine and the U.S. 9 | Government have not placed any restriction on its use or reproduction. 10 | 11 | Although all reasonable efforts have been taken to ensure the accuracy 12 | and reliability of the software and data, the NLM and the U.S. 13 | Government do not and cannot warrant the performance or results that 14 | may be obtained by using this software or data. The NLM and the U.S. 15 | Government disclaim all warranties, express or implied, including 16 | warranties of performance, merchantability or fitness for any particular 17 | purpose. 18 | 19 | Please cite NCBI in any work or product based on this material. 20 | -------------------------------------------------------------------------------- /Makefile-create-blastdb-metadata: -------------------------------------------------------------------------------- 1 | # Makefile to test create-blastdb-metadata.py script 2 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 3 | # Created: Tue 15 Jun 2021 11:01:20 AM EDT 4 | 5 | SHELL=/bin/bash 6 | .PHONY: all clean check check_python 7 | 8 | ######################################################################### 9 | # Python support 10 | 11 | all: 12 | bin/create-blastdb-metadata.py --help 13 | bin/create-blastdb-metadata.py --version 14 | 15 | check: check_python 16 | ${RM} -f testdb-prot-metadata.json 17 | bin/create-blastdb-metadata.py --db tests/blastdb/testdb --dbtype prot 18 | jq -Mr '.' testdb-prot-metadata.json 19 | ${RM} -f testdb-prot-metadata.json 20 | 21 | check_python: 22 | python3 -m py_compile bin/create-blastdb-metadata.py 23 | python3 -m unittest bin/create-blastdb-metadata.py 24 | 25 | clean: 26 | find . -name __pycache__ | xargs ${RM} -fr 27 | ${RM} -f testdb-prot-metadata.json 28 | -------------------------------------------------------------------------------- /Makefile-gcp-elb-janitor: -------------------------------------------------------------------------------- 1 | # Makefile to facilitate management/testing of the ElasticBLAST janitor cronjob 2 | 3 | list: 4 | kubectl get cj 5 | kubectl describe cj 6 | -kubectl get clusterrolebinding | grep janitor 7 | 8 | logs: 9 | for job in `kubectl get jobs -o NAME | grep janitor`; do \ 10 | j=`basename $$job`; \ 11 | echo "************ Cronjob $$j ******************"; \ 12 | pod=`kubectl get pods --selector=job-name=$$j -o NAME | grep -v NAME`; \ 13 | echo $$pod; \ 14 | kubectl logs $$pod; \ 15 | done 16 | 17 | init: elb-janitor.yaml 18 | kubectl apply -f src/elastic_blast/templates/elb-janitor-rbac.yaml 19 | kubectl apply -f elb-janitor.yaml 20 | 21 | clean: 22 | kubectl delete `kubectl get cj -o NAME` 23 | kubectl delete -f src/elastic_blast/templates/elb-janitor-rbac.yaml 24 | ${RM} elb-janitor.yaml 25 | 26 | ELB_GCP_REGION?=us-east4 27 | ELB_GCP_ZONE?=us-east4-b 28 | ELB_GCP_PROJECT?=ncbi-sandbox-blast 29 | ELB_RESULTS?=gs://elasticblast-${USER}/auto-shutdown-6 30 | ELB_CLUSTER_NAME?=`make -s results2clustername ELB_RESULTS=${ELB_RESULTS}` 31 | 32 | elb-janitor.yaml: src/elastic_blast/templates/elb-janitor-cronjob.yaml.template 33 | ELB_RESULTS=${ELB_RESULTS} \ 34 | ELB_GCP_PROJECT=${ELB_GCP_PROJECT} \ 35 | ELB_GCP_REGION=${ELB_GCP_REGION} \ 36 | ELB_GCP_ZONE=${ELB_GCP_ZONE} \ 37 | ELB_CLUSTER_NAME=${ELB_CLUSTER_NAME} \ 38 | envsubst '$$ELB_RESULTS $$ELB_GCP_PROJECT $$ELB_GCP_ZONE $$ELB_GCP_REGION $$ELB_CLUSTER_NAME' < $< > $@ 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ElasticBLAST 2 | ============ 3 | 4 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/elastic-blast/badges/version.svg)](https://anaconda.org/bioconda/elastic-blast) 5 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/elastic-blast/badges/latest_release_date.svg)](https://anaconda.org/bioconda/elastic-blast) 6 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/elastic-blast/badges/downloads.svg)](https://anaconda.org/bioconda/elastic-blast) 7 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/elastic-blast/badges/installer/conda.svg)](https://conda.anaconda.org/bioconda) 8 | 9 | [![PyPI version](https://badge.fury.io/py/elastic-blast.svg)](https://badge.fury.io/py/elastic-blast) 10 | 11 | ElasticBLAST is a cloud-based tool to perform your BLAST searches faster and make you more effective. 12 | 13 | ElasticBLAST is ideal for users who have a large number (thousands or more) of queries to BLAST or who prefer to use cloud infrastructure for their searches. It can run BLAST searches that cannot be done on [NCBI WebBLAST](https://blast.ncbi.nlm.nih.gov) and runs them more quickly than stand-alone [BLAST+](https://www.ncbi.nlm.nih.gov/books/NBK279690/). 14 | 15 | ElasticBLAST speeds up your work by distributing your BLAST+ searches across multiple cloud instances. The ability to scale resources in this way allows larger numbers of queries to be searched in a shorter time than you could with BLAST+ on a single host. 16 | 17 | The National Center for Biotechnology Information ([NCBI](https://www.ncbi.nlm.nih.gov)), part of the National Library of 18 | Medicine at the NIH, developed and maintains ElasticBLAST. 19 | 20 | The NCBI is making the source code for ElasticBLAST available on GitHub as an 21 | Open Distribution to allow the user community to easily obtain and examine 22 | that code. GitHub also provides a means for users to report issues and 23 | suggest modifications through pull requests. 24 | 25 | The NCBI will use internal source code control as the repository of record and 26 | push regular releases of the ElasticBLAST 27 | source code to GitHub. The BLAST developers will work to ensure that 28 | ElasticBLAST continues to function in 29 | changing environments and, when possible, integrate user feedback into 30 | ElasticBLAST. Owing to resource constraints, 31 | they cannot absolutely commit to act on all issue reports, except critical 32 | security vulnerabilities. 33 | 34 | End-user documentation 35 | ---------------------- 36 | 37 | Please visit https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/ 38 | 39 | How to get ElasticBLAST 40 | ----------------------- 41 | 42 | There are several ways to obtain ElasticBLAST, please select the one that is 43 | most suitable to you: 44 | 45 | * [Installation from PyPI.org][1] 46 | * [Installation from BioConda][2] 47 | * [Installation for the AWS Cloud Shell][3] 48 | * [Installation for the GCP Cloud Shell][4] 49 | 50 | 51 | Publication: 52 | ------------ 53 | 54 | Camacho C, Boratyn GM, Joukov V, Vera Alvarez R, Madden TL. ElasticBLAST: accelerating sequence search via cloud computing. BMC Bioinformatics. 2023 Mar 26;24(1):117. doi: [10.1186/s12859-023-05245-9](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-023-05245-9). PMID: [36967390](https://pubmed.ncbi.nlm.nih.gov/36967390/); PMCID: [PMC10040096](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10040096/). 55 | 56 | 57 | Developer information 58 | --------------------- 59 | 60 | ### How to build ElasticBLAST 61 | 62 | make elastic-blast 63 | 64 | ### Requirements for building ElasticBLAST 65 | 66 | In addition to the requirements listed in the [documentation][5], the [AWS Command Line Interface][6] is required. 67 | 68 | 69 | [1]: https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/tutorials/pypi-install.html#tutorial-pypi 70 | [2]: https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/tutorials/conda-install.html#tutorial-conda 71 | [3]: https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/quickstart-aws.html#get-elasticblast 72 | [4]: https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/quickstart-gcp.html#get-elasticblast 73 | [5]: https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/requirements.html 74 | [6]: https://aws.amazon.com/cli/ 75 | -------------------------------------------------------------------------------- /bin/aws-create-elastic-blast-janitor-role.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # aws-create-elastic-blast-janitor-role.sh: Create and tag role for running 3 | # ElasticBLAST janitor on AWS 4 | # 5 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Created: Mon Nov 29 17:29:31 EST 2021 7 | 8 | set -xeuo pipefail 9 | shopt -s nullglob 10 | 11 | ROLE_PATH=/app/ncbi/elasticblast/ 12 | ROLE_NAME=ncbi-elasticblast-janitor-role 13 | 14 | TMP=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 15 | trap " /bin/rm -fr $TMP " INT QUIT EXIT HUP KILL ALRM 16 | 17 | aws iam list-roles --path-prefix ${ROLE_PATH} --output text | tee $TMP 18 | [ -s $TMP ] && { echo "Role $ROLE_PATH/$ROLE_NAME exists, exiting"; exit 0 ; } 19 | 20 | # Create the trust policy file 21 | cat >$TMP<&/dev/null || exit 0 14 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AWSLambda_FullAccess 15 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonVPCReadOnlyAccess 16 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess 17 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AWSBatchFullAccess 18 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonEC2FullAccess 19 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/IAMFullAccess 20 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AWSCloudFormationFullAccess 21 | aws iam detach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/CloudWatchEventsFullAccess 22 | aws iam delete-role --role-name ${ROLE_NAME} 23 | -------------------------------------------------------------------------------- /bin/aws-describe-elastic-blast-janitor-role.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # aws-describe-elastic-blast-janitor.sh: Describe the role to run the 3 | # ElasticBLAST janitor on AWS 4 | # 5 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Created: Tue Nov 30 08:09:03 EST 2021 7 | 8 | set -xuo pipefail 9 | shopt -s nullglob 10 | 11 | ROLE_NAME=ncbi-elasticblast-janitor-role 12 | aws iam get-role --role-name $ROLE_NAME --output json 13 | -------------------------------------------------------------------------------- /bin/aws-get-auto-scaling-events.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # aws-get-auto-scaling-events.sh: Get autoscaling events for ElasticBLAST's 3 | # AWS Batch compute environment 4 | # 5 | # Author: Greg Boratyn (borayng@ncbi.nlm.nih.gov) 6 | # Created: Fri Aug 12 17:28:20 EDT 2022 7 | 8 | # The script assumes that elastic-blast.log file exists 9 | logfile=${1:-elastic-blast.log} 10 | COMP_ENV_NAME=$(grep ComputeEnvName $logfile | tr '/' '\t' | cut -f 2 | tail -n 1) 11 | if [ ! -z "${COMP_ENV_NAME}" ] ; then 12 | AUTO_SCALE_GRP_NAME=$(aws autoscaling describe-auto-scaling-groups --output json | jq -Mr '.AutoScalingGroups[].AutoScalingGroupName' | grep $COMP_ENV_NAME) 13 | if [ $? -eq 0 ] ; then 14 | aws autoscaling describe-scaling-activities --auto-scaling-group-name $AUTO_SCALE_GRP_NAME 15 | else 16 | echo "Failed to find an AWS auto scaling group for the AWS Batch Compute environment $COMP_ENV_NAME" 17 | exit 1 18 | fi 19 | else 20 | echo "Failed to find an AWS Batch Compute environment in $logfile" 21 | exit 1 22 | fi 23 | -------------------------------------------------------------------------------- /bin/aws-show-my-undeleted-searches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # aws-show-my-undeleted-searches.sh: This script shows my undeleted searches in 3 | # AWS and their status 4 | # 5 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Created: Sat 07 Aug 2021 11:01:46 AM EDT 7 | 8 | set -euo pipefail 9 | shopt -s nullglob 10 | 11 | username=`whoami` 12 | verbose=0 13 | 14 | command -v elastic-blast >&/dev/null || { echo "elastic-blast must be in your PATH for this script to work"; exit 1; } 15 | 16 | usage() { 17 | echo -e "$0 [-h] [-u USERNAME] [-v]\n" 18 | echo -e "This script shows ElasticBLAST searches that have not been deleted on AWS and their statusn" 19 | echo -e "Options:" 20 | echo -e "\t-u USERNAME: Show ElasticBLAST searches for user USERNAME (default: $username)" 21 | echo -e "\t-v: Show verbose output, i.e.: displays your AWS user identity" 22 | echo -e "\t-h: Show this message" 23 | } 24 | 25 | check_status() { 26 | results=$1 27 | created=$2 28 | status_file=$3 29 | now=$(date -u +"%s") 30 | SECONDS_IN_A_DAY=$((24*60*60)) 31 | if egrep -q '^Your ElasticBLAST search succeeded,|^Pending 0' $status_file; then 32 | case `uname` in 33 | Linux) 34 | created_date=$(date -d "$created" +"%s") 35 | ;; 36 | Darwin) 37 | created_date=$(date -j -f "%F %T" "$created" +"%s") 38 | ;; 39 | esac 40 | if [ $(($now - $created_date)) -gt $SECONDS_IN_A_DAY ]; then 41 | echo "Please run 'elastic-blast delete --results $results'" 42 | fi 43 | fi 44 | } 45 | 46 | while getopts "u:vh" OPT; do 47 | case $OPT in 48 | u) username=${OPTARG} 49 | ;; 50 | v) verbose=1 51 | ;; 52 | h) usage 53 | exit 0 54 | ;; 55 | esac 56 | done 57 | 58 | # User name for label computed as in elastic_blast.elb_config.create_labels 59 | user=$(echo $username | tr '[A-Z-]' '[a-z_]' | tr '.' '-' | cut -b-62) 60 | 61 | TMP=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 62 | STATUS=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 63 | trap " /bin/rm -fr $TMP $STATUS" INT QUIT EXIT HUP KILL ALRM 64 | 65 | if [ $verbose -eq 1 ]; then 66 | echo -n "AWS user identity: "; 67 | aws sts get-caller-identity --output json | jq -Mr .Arn 68 | fi 69 | 70 | aws batch describe-compute-environments --output json | \ 71 | jq -Mr ".computeEnvironments[] | select(.tags.creator==\"$user\") | [ .tags.results, .tags.created ] | @tsv" > $TMP 72 | 73 | [ -s $TMP ] && echo "These are your ElasticBLAST searches on AWS that have not been deleted" 74 | 75 | while read -r results c; do 76 | created=$(echo $c | sed 's/-/ /3;s/-/:/4;s/-/:/3') 77 | echo "##### Results bucket: $results" 78 | echo "##### Created: $created UTC" 79 | echo "##### Status:" 80 | elastic-blast status --results $results | tee $STATUS 81 | check_status $results "$created" $STATUS 82 | done < $TMP 83 | echo 84 | 85 | # Show also those CloudFormation stacks that failed to delete 86 | aws cloudformation describe-stacks --output json | \ 87 | jq -Mr ".Stacks[] | select( (.StackName|contains(\"$user\")) and (.StackStatus|contains(\"DELETE\")) ) | [ (.Tags[] | select(.Key==\"results\") | .Value), (.Tags[] | select(.Key==\"created\") | .Value), .StackStatus ] | @tsv" > $TMP 88 | [ ! -s $TMP ] && exit 89 | 90 | echo "These are your failed CloudFormation stacks, please be sure to delete them with the commands listed below" 91 | 92 | while read -r results c status; do 93 | created=$(echo $c | sed 's/-/ /3;s/-/:/4;s/-/:/3') 94 | echo "##### Results bucket: $results" 95 | echo "##### Created: $created UTC" 96 | echo "##### Status: $status" 97 | echo elastic-blast delete --results $results 98 | done < $TMP 99 | -------------------------------------------------------------------------------- /bin/gcp-setup-elastic-blast-janitor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # bin/gcp-setup-elastic-blast-janitor.sh: Script to set up the ElasticBLAST 3 | # janitor permissions in GCP 4 | # 5 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Created: Tue 08 Mar 2022 04:53:15 PM EST 7 | 8 | set -euo pipefail 9 | shopt -s nullglob 10 | 11 | command -v gcloud >&/dev/null || { echo "gcloud must be in your PATH for this script to work"; exit 1; } 12 | 13 | user=$(gcloud config get-value account) 14 | prj=$(gcloud config get-value project) 15 | 16 | usage() { 17 | echo -e "$0 [-h] [-u USERNAME] [-p GCP_PROJECT_ID]\n" 18 | echo -e "This script sets up the permissions to the ElasticBLAST janitor in GCP" 19 | echo -e "Options:" 20 | echo -e "\t-u USERNAME: GCP user, group or service account to configure (default: user:$user)" 21 | echo -e "\t\tFor specific format, please see https://cloud.google.com/sdk/gcloud/reference/projects/add-iam-policy-binding#--member" 22 | echo -e "\t-p GCP_PROJECT_ID: GCP project ID (default: ${prj})" 23 | echo -e "\t\tDocumentation: https://cloud.google.com/sdk/gcloud/reference/projects/add-iam-policy-binding#PROJECT_ID" 24 | echo -e "\t-h: Show this message" 25 | } 26 | 27 | while getopts "u:p:h" OPT; do 28 | case $OPT in 29 | u) user=${OPTARG} 30 | ;; 31 | p) prj=${OPTARG} 32 | ;; 33 | h) usage 34 | exit 0 35 | ;; 36 | esac 37 | done 38 | 39 | gcloud projects add-iam-policy-binding ${prj} --member=user:${user} --role=roles/container.admin 40 | -------------------------------------------------------------------------------- /bin/gcp-show-my-undeleted-searches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # gcp-show-my-undeleted-searches.sh: This script shows my undeleted searches in 3 | # GCP and their status 4 | # 5 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Created: Tue 17 Aug 2021 09:57:27 PM EDT 7 | 8 | set -o pipefail 9 | shopt -s nullglob 10 | 11 | username=`whoami` 12 | verbose=0 13 | 14 | command -v elastic-blast >&/dev/null || { echo "elastic-blast must be in your PATH for this script to work"; exit 1; } 15 | 16 | usage() { 17 | echo -e "$0 [-h] [-u USERNAME] [-v]\n" 18 | echo -e "This script shows ElasticBLAST searches that have not been deleted on GCP and their status\n" 19 | echo -e "Options:" 20 | echo -e "\t-u USERNAME: Show ElasticBLAST searches for user USERNAME (default: $username)" 21 | echo -e "\t-v: Show verbose output, i.e.: displays your GCP configuration settings" 22 | echo -e "\t-h: Show this message" 23 | } 24 | 25 | check_status() { 26 | results=$1 27 | created=$2 28 | status_file=$3 29 | now=$(date -u +"%s") 30 | SECONDS_IN_A_DAY=$((24*60*60)) 31 | if egrep -q '^Your ElasticBLAST search succeeded,' $status_file; then 32 | case `uname` in 33 | Linux) 34 | created_date=$(date -d "$created" +"%s") 35 | ;; 36 | Darwin) 37 | created_date=$(date -j -f "%F %T" "$created" +"%s") 38 | ;; 39 | esac 40 | if [ $(($now - $created_date)) -gt $SECONDS_IN_A_DAY ]; then 41 | echo "Please run 'elastic-blast delete --results $results --gcp-project $ELB_GCP_PROJECT --gcp-region $ELB_GCP_REGION --gcp-zone $ELB_GCP_ZONE'" 42 | fi 43 | fi 44 | } 45 | 46 | while getopts "u:vh" OPT; do 47 | case $OPT in 48 | u) username=${OPTARG} 49 | ;; 50 | v) verbose=1 51 | ;; 52 | h) usage 53 | exit 0 54 | ;; 55 | esac 56 | done 57 | 58 | if [ -z "${ELB_GCP_PROJECT}" ]; then 59 | export ELB_GCP_PROJECT=`gcloud config get-value core/project` 60 | fi 61 | if [ -z "${ELB_GCP_REGION}" ]; then 62 | export ELB_GCP_REGION=`gcloud config get-value compute/region` 63 | fi 64 | if [ -z "${ELB_GCP_ZONE}" ]; then 65 | export ELB_GCP_ZONE=`gcloud config get-value compute/zone` 66 | fi 67 | 68 | if [ $verbose -eq 1 ]; then 69 | echo -n "Account: " ; gcloud config get-value core/account 70 | echo "Project: $ELB_GCP_PROJECT" 71 | echo "Region: $ELB_GCP_REGION" 72 | echo "Zone: $ELB_GCP_ZONE" 73 | fi 74 | 75 | TMP=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 76 | STATUS=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 77 | trap " /bin/rm -fr $TMP $STATUS" INT QUIT EXIT HUP KILL ALRM 78 | 79 | # User name for label computed as in elastic_blast.elb_config.create_labels 80 | user=$(echo $username | tr '[A-Z-]' '[a-z_]' | tr '.' '-' | cut -b-62) 81 | gcloud container clusters list --filter=resourceLabels.owner=$user --format='value(resourceLabels.results,resourceLabels.created)' | sort > $TMP 82 | [ -s $TMP ] && { 83 | echo "These are your ElasticBLAST searches on GCP that have not been deleted"; 84 | echo "Please note that the results bucket names below been modified to remove upper case and all '/' characters following 'gs://'"; 85 | } 86 | 87 | while read -r r c; do 88 | results=$(echo $r | sed 's,---,://,') 89 | created=$(echo $c | sed 's/-/ /3;s/-/:/4;s/-/:/3') 90 | # FIXME: how to restore original results bucket name? 91 | #if [[ "$r" =~ "elasticblast-$user" ]]; then 92 | # results=$(echo $r | sed "s,---,://,;s,$user,$user/,") 93 | #fi 94 | echo "##### Results bucket: $results" 95 | echo "##### Created: $created UTC" 96 | #echo "##### Status:" 97 | #elastic-blast status --results $results | tee $STATUS 98 | #check_status $results "$created" $STATUS 99 | done < $TMP 100 | -------------------------------------------------------------------------------- /bin/results2clustername.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # results2clustername.sh: Script to convert ElasticBLAST results to the default 3 | # cluster name 4 | # 5 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Created: Thu 08 Apr 2021 04:07:29 PM EDT 7 | 8 | if [ $# -ne 1 ] ; then 9 | echo "Usage: $0 " 10 | exit 1 11 | fi 12 | elb_results=$1 13 | md5=md5sum 14 | command -v $md5 >& /dev/null || md5=md5 15 | results_hash=$(printf $elb_results | $md5 | cut -b-9) 16 | echo elasticblast-$USER-$results_hash 17 | -------------------------------------------------------------------------------- /docker-blast/Dockerfile: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM ncbi/blast:2.16.0 as blast 22 | ARG version 23 | LABEL Description="NCBI BLAST" Vendor="NCBI/NLM/NIH" Version=${version} Maintainer=camacho@ncbi.nlm.nih.gov 24 | 25 | USER root 26 | WORKDIR /root/ 27 | 28 | COPY requirements.txt . 29 | 30 | RUN apt-get -y -m update && \ 31 | apt-get install -y python3 python3-pip time parallel vmtouch curl wget unzip && \ 32 | pip3 --version && python3 -m pip --version && \ 33 | rm -rf /var/lib/apt/lists/* 34 | 35 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 36 | python3 -m pip install --no-cache-dir -r requirements.txt && \ 37 | rm -frv requirements.txt 38 | 39 | COPY splitq_download_db_search /blast/bin/ 40 | RUN sed -i -e "s/\$VERSION/$version/" /blast/bin/splitq_download_db_search 41 | COPY fasta-split /blast/bin/ 42 | 43 | CMD ["/bin/bash"] 44 | 45 | -------------------------------------------------------------------------------- /docker-blast/Dockerfile-build-from-local-sources: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM ncbi/blast:2.16.0 as blast 22 | ARG version 23 | LABEL Description="NCBI BLAST" Vendor="NCBI/NLM/NIH" Version=${version} Maintainer=camacho@ncbi.nlm.nih.gov 24 | 25 | FROM google/cloud-sdk:slim 26 | # FROM resets ARGs, thus repeated ARG instruction 27 | ARG version 28 | 29 | USER root 30 | WORKDIR /root/ 31 | 32 | COPY requirements.txt . 33 | RUN cat requirements.txt | grep -v elastic\-blast >tmp && mv tmp requirements.txt 34 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 35 | python3 -m pip install --no-cache-dir -r requirements.txt && \ 36 | rm -frv requirements.txt 37 | 38 | COPY src/ /blast/src/ 39 | COPY bin/ /blast/bin/ 40 | COPY requirements/ /blast/requirements/ 41 | COPY setup.py /blast/setup.py 42 | COPY setup.cfg_cloud /blast/setup.cfg 43 | 44 | WORKDIR /blast/ 45 | RUN python3 -m pip install . 46 | 47 | WORKDIR /root/ 48 | 49 | RUN apt-get -y -m update && \ 50 | apt-get install -y libgomp1 libnet-perl libidn11 libxml-simple-perl libjson-perl perl-doc liblmdb-dev time parallel vmtouch cpanminus curl wget libio-socket-ssl-perl libhtml-parser-perl unzip && \ 51 | rm -rf /var/lib/apt/lists/* 52 | 53 | RUN mkdir -p /blast/bin /blast/lib 54 | COPY --from=blast /blast/bin /blast/bin 55 | COPY --from=blast /blast/lib /blast/lib 56 | COPY --from=blast /root/edirect /root/edirect 57 | COPY splitq_download_db_search /blast/bin/ 58 | RUN sed -i -e "s/\$VERSION/$version/" /blast/bin/splitq_download_db_search 59 | COPY fasta-split /blast/bin/ 60 | 61 | 62 | RUN mkdir -p /blast/blastdb /blast/blastdb_custom 63 | RUN sed -i '$ a BLASTDB=/blast/blastdb:/blast/blastdb_custom' /etc/environment 64 | ENV BLASTDB /blast/blastdb:/blast/blastdb_custom 65 | ENV PATH="/root/edirect:/blast/bin:${PATH}" 66 | 67 | 68 | WORKDIR /blast 69 | 70 | CMD ["/bin/bash"] 71 | 72 | -------------------------------------------------------------------------------- /docker-blast/LICENSE.md: -------------------------------------------------------------------------------- 1 | PUBLIC DOMAIN NOTICE 2 | National Center for Biotechnology Information 3 | 4 | This software is a "United States Government Work" under the 5 | terms of the United States Copyright Act. It was written as part of 6 | the authors' official duties as United States Government employees and 7 | thus cannot be copyrighted. This software is freely available 8 | to the public for use. The National Library of Medicine and the U.S. 9 | Government have not placed any restriction on its use or reproduction. 10 | 11 | Although all reasonable efforts have been taken to ensure the accuracy 12 | and reliability of the software and data, the NLM and the U.S. 13 | Government do not and cannot warrant the performance or results that 14 | may be obtained by using this software or data. The NLM and the U.S. 15 | Government disclaim all warranties, express or implied, including 16 | warranties of performance, merchantability or fitness for any particular 17 | purpose. 18 | 19 | Please cite NCBI in any work or product based on this material. 20 | -------------------------------------------------------------------------------- /docker-blast/README.md: -------------------------------------------------------------------------------- 1 | # ElasticBLAST docker image 2 | 3 | This directory contains the tools needed to build the docker image used to 4 | run BLAST in ElasticBLAST. 5 | 6 | The `Makefile` contains targets to build, test and deploy the docker image in 7 | various repositories. 8 | 9 | If you have `docker` available, run `make build` to build the image, and `make 10 | check` to test it locally. 11 | 12 | -------------------------------------------------------------------------------- /docker-blast/awscloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'docker' 3 | args: [ 'build', '-t', '${_IMG}:$TAG_NAME', '-t', '${_IMG}:latest', '-f', '${_DOCKERFILE}', '.' ] 4 | - name: 'docker' 5 | args: [ 'login', '-u', 'AWS', '-p', '${_AWS_ECR_PASSWD}', '${_SERVER}' ] 6 | - name: 'docker' 7 | args: [ 'push', '${_IMG}:$TAG_NAME' ] 8 | - name: 'docker' 9 | args: [ 'push', '${_IMG}:latest' ] 10 | 11 | substitutions: 12 | _DOCKERFILE: 'Dockerfile' 13 | -------------------------------------------------------------------------------- /docker-blast/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'docker' 3 | args: [ 'build', '-t', '${_IMG}:$TAG_NAME', '-t', '${_IMG}:latest', '-f', '${_DOCKERFILE}', '.' ] 4 | images: ['${_IMG}:$TAG_NAME', '${_IMG}:latest'] 5 | 6 | substitutions: 7 | _DOCKERFILE: 'Dockerfile' 8 | -------------------------------------------------------------------------------- /docker-blast/requirements.txt: -------------------------------------------------------------------------------- 1 | awscli 2 | filelock 3 | boto3 4 | ec2_metadata==2.4.0 5 | requests>=2.31.0 6 | -------------------------------------------------------------------------------- /docker-blast/test-docker-image-aws.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: '${_IMG}' 3 | args: ['update_blastdb.pl', '--version'] 4 | - name: '${_IMG}' 5 | args: ['blastn', '-version-full'] 6 | - name: '${_IMG}' 7 | args: ['blastdb_path', '-version-full'] 8 | - name: '${_IMG}' 9 | args: ['which', 'vmtouch'] 10 | - name: '${_IMG}' 11 | args: ['aws', '--version'] 12 | - name: '${_IMG}' 13 | args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/latest-dir'] 14 | - name: '${_IMG}' 15 | args: ['gsutil', '--version'] 16 | - name: '${_IMG}' 17 | args: ['gsutil', 'ls', 'gs://blast-db'] 18 | - name: '${_IMG}' 19 | args: ['printenv', 'BLASTDB', 'PATH'] 20 | - name: '${_IMG}' 21 | args: ['fasta-split', '--help'] 22 | - name: '${_IMG}' 23 | args: ['splitq_download_db_search', '--version'] 24 | - name: '${_IMG}' 25 | args: ['splitq_download_db_search', '--help'] 26 | -------------------------------------------------------------------------------- /docker-blast/test-docker-image-gcp.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 3 | args: ['update_blastdb.pl', '--version'] 4 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 5 | args: ['blastn', '-version-full'] 6 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 7 | args: ['blastdb_path', '-version-full'] 8 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 9 | args: ['which', 'vmtouch'] 10 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 11 | args: ['aws', '--version'] 12 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 13 | args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/latest-dir'] 14 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 15 | args: ['gsutil', '--version'] 16 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 17 | args: ['gsutil', 'ls', 'gs://blast-db'] 18 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 19 | args: ['printenv', 'BLASTDB', 'PATH'] 20 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 21 | args: ['fasta-split', '--help'] 22 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 23 | args: ['splitq_download_db_search', '--version'] 24 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 25 | args: ['splitq_download_db_search', '--help'] 26 | -------------------------------------------------------------------------------- /docker-blast/test_data/Garbage_query_file.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/Garbage_query_file.fa -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.nin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.nin -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.nnd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.nnd -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.nni: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.nni -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.nog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.nog -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.nos: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.nos -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.not: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.not -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.ntf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.ntf -------------------------------------------------------------------------------- /docker-blast/test_data/pdbnt.nto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/docker-blast/test_data/pdbnt.nto -------------------------------------------------------------------------------- /docker-demo/.gcloudignore: -------------------------------------------------------------------------------- 1 | .gcloudignore 2 | .gitignore 3 | .git 4 | *~ 5 | Makefile 6 | test 7 | typescript 8 | -------------------------------------------------------------------------------- /docker-demo/Dockerfile: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM google/cloud-sdk:slim 22 | 23 | ARG version 24 | ARG username=masterblaster 25 | ARG grpname=blast 26 | 27 | LABEL Description="NCBI ElasticBLAST" 28 | LABEL Version=${version} 29 | LABEL Vendor="NCBI/NLM/NIH" 30 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 31 | 32 | USER root 33 | 34 | COPY requirements.txt . 35 | 36 | RUN apt-get -y -m update && \ 37 | apt-get install -y python3 kubectl && \ 38 | pip3 install --no-cache-dir --upgrade pip && \ 39 | pip3 install --no-cache-dir -r requirements.txt && \ 40 | rm -rf /var/lib/apt/lists/* requirements.txt 41 | 42 | RUN groupadd -r ${grpname} && \ 43 | useradd --no-log-init -r -g ${grpname} ${username} && \ 44 | mkdir -vp /home/${username} && \ 45 | chown -Rc ${username}:${grpname} /home/${username} 46 | 47 | USER ${username} 48 | WORKDIR /home/${username} 49 | 50 | COPY --chown=${username}:${grpname} Makefile-demo.mk Makefile 51 | COPY --chown=${username}:${grpname} README.md *.ini /home/${username}/ 52 | 53 | CMD ["/bin/bash"] 54 | 55 | -------------------------------------------------------------------------------- /docker-demo/Dockerfile-build-from-local-sources: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM google/cloud-sdk:slim 22 | 23 | ARG version 24 | ARG username=masterblaster 25 | ARG grpname=blast 26 | 27 | LABEL Description="NCBI ElasticBLAST" 28 | LABEL Version=${version} 29 | LABEL Vendor="NCBI/NLM/NIH" 30 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 31 | 32 | USER root 33 | 34 | RUN apt-get -y -m update && \ 35 | apt-get install -y python3 python3-distutils kubectl && \ 36 | pip3 install --no-cache-dir --upgrade pip && \ 37 | pip3 install --no-cache-dir awscli && \ 38 | rm -rf /var/lib/apt/lists/* 39 | 40 | RUN groupadd -r ${grpname} && \ 41 | useradd --no-log-init -r -g ${grpname} ${username} && \ 42 | mkdir -vp /home/${username} && \ 43 | chown -Rc ${username}:${grpname} /home/${username} 44 | 45 | USER ${username} 46 | WORKDIR /home/${username} 47 | 48 | COPY src/ /home/${username}/src/ 49 | COPY bin/ /home/${username}/bin/ 50 | COPY requirements/ /home/${username}/requirements/ 51 | COPY setup.py /home/${username}/setup.py 52 | COPY setup.cfg_cloud /home/${username}/setup.cfg 53 | USER root 54 | RUN pip3 install . -r requirements/test.txt 55 | 56 | USER ${username} 57 | 58 | COPY --chown=${username}:${grpname} Makefile-demo.mk Makefile 59 | COPY --chown=${username}:${grpname} README.md *.ini /home/${username}/ 60 | 61 | CMD ["/bin/bash"] 62 | 63 | -------------------------------------------------------------------------------- /docker-demo/Makefile: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | # Makefile for building a container to run an ElasticBLAST demo 22 | # 23 | # Author: Christiam Camacho 24 | # Created: Tue Oct 13 10:48:55 EDT 2020 25 | 26 | SHELL=/bin/bash 27 | .PHONY: all pre-check check clean build publish 28 | 29 | IMG?=ncbi/elastic-blast-demo 30 | VERSION?=$(shell git describe --abbrev=0 --tags) 31 | 32 | all: build check 33 | 34 | build: 35 | docker build --build-arg version=${VERSION} -t ${IMG}:${VERSION} . 36 | docker tag ${IMG}:${VERSION} ${IMG}:latest 37 | 38 | publish: build 39 | docker push ${IMG}:${VERSION} 40 | docker push ${IMG}:latest 41 | 42 | clean: 43 | -docker image rm ${IMG}:${VERSION} ${IMG}:latest 44 | 45 | check: 46 | docker run --rm ${IMG}:${VERSION} gcloud version 47 | docker run --rm ${IMG}:${VERSION} aws --version 48 | docker run --rm ${IMG}:${VERSION} kubectl version --client=true 49 | docker run --rm ${IMG}:${VERSION} elastic-blast --version 50 | docker run --rm ${IMG}:${VERSION} ls -lRa 51 | 52 | TAG?=${USER} 53 | gcp-build: 54 | gcloud builds submit --config cloudbuild.yaml --substitutions _TAG=$(TAG),_VERSION=${VERSION},_IMG=${IMG} 55 | 56 | # Use this target to build an image from your local sources as opposed to those on PyPI.org 57 | # Your image will be named gcr.io/${GCP_PROJECT}/${IMG}:${USER} 58 | gcp-build-from-local-sources: 59 | rsync -a ../setup.py ../setup.cfg_cloud ../src ../bin ../requirements ${PWD}/ 60 | sed -i~ -e '/^value = $${VERSION}/d;' setup.cfg_cloud 61 | echo "value = ${VERSION}" >> setup.cfg_cloud 62 | mv Dockerfile Dockerfile~ 63 | mv Dockerfile-build-from-local-sources Dockerfile 64 | -gcloud builds submit --config cloudbuild.yaml --substitutions _TAG=$(TAG),_VERSION=${VERSION},_IMG=${IMG} 65 | rm -fr src bin requirements setup.cfg_cloud setup.py 66 | mv Dockerfile Dockerfile-build-from-local-sources 67 | mv Dockerfile~ Dockerfile 68 | 69 | GCP_PROJECT?=$(shell gcloud config get-value project 2>/dev/null) 70 | gcp-check: 71 | -gcloud container images list --repository=gcr.io/${GCP_PROJECT}/${IMG} 72 | -gcloud container images list-tags gcr.io/ncbi-sandbox-blast/ncbi/elastic-blast-demo 73 | -gcloud container images describe gcr.io/ncbi-sandbox-blast/ncbi/elastic-blast-demo:latest 74 | -gcloud container images describe gcr.io/ncbi-sandbox-blast/ncbi/elastic-blast-demo:${VERSION} 75 | gcloud builds submit --config test-cloudbuild.yaml --substitutions _TAG=$(TAG),_IMG=${IMG} 76 | 77 | gcp-list-tagless-images: 78 | gcloud container images list-tags gcr.io/ncbi-sandbox-blast/ncbi/elastic-blast-demo \ 79 | --filter='-tags:*' --format="get(digest)" 80 | -------------------------------------------------------------------------------- /docker-demo/Makefile-demo.mk: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | # Makefile for ElasticBLAST demo 22 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 23 | # Created: Tue 06 Oct 2020 11:42:59 AM EDT 24 | 25 | SHELL=/bin/bash 26 | .PHONY: all clean distclean check 27 | 28 | ELB?=/usr/bin/elastic-blast 29 | 30 | GCP_CFG?=elb-gcp-blastn-mane-pdbnt.ini 31 | AWS_CFG?=elb-aws-blastn-mane-pdbnt.ini 32 | GCP_LOG?=elb-gcp.log 33 | AWS_LOG?=elb-aws.log 34 | 35 | creds: gcp-creds aws-creds 36 | config: gcp-config aws-config 37 | log: gcp-log aws-log 38 | run: gcp-run aws-run 39 | delete: gcp-delete aws-delete 40 | status: gcp-status aws-status 41 | 42 | bucket-suffix.txt: 43 | openssl rand -hex 3 > $@ 44 | 45 | version: 46 | ${ELB} --version 47 | 48 | gcp-init: bucket-suffix.txt 49 | sed -i~ -e 's/BUCKET_SUFFIX/$(shell cat $<)/' elb-*.ini 50 | gsutil mb gs://elasticblast-demo-$(shell cat $<) 51 | 52 | gcp-run: 53 | ${ELB} submit --cfg ${GCP_CFG} --logfile ${GCP_LOG} --loglevel DEBUG 54 | 55 | gcp-results: 56 | gsutil ls -lr $(shell awk -F= '/^results/ {print $$2}' ${GCP_CFG}) 57 | gsutil cat $(shell awk -F= '/^results/ {print $$2}' ${GCP_CFG})/batch_000-blastn-pdbnt.out.gz | gzip -cd - 58 | 59 | gcp-status: 60 | ${ELB} status --cfg ${GCP_CFG} --logfile ${GCP_LOG} --loglevel DEBUG 61 | 62 | gcp-delete: 63 | ${ELB} delete --cfg ${GCP_CFG} --logfile ${GCP_LOG} --loglevel DEBUG 64 | 65 | gcp-creds: 66 | gcloud info 67 | 68 | gcp-config: 69 | cat -n ${GCP_CFG} 70 | 71 | gcp-log: 72 | cat -n ${GCP_LOG} 73 | 74 | gcp-distclean: 75 | gsutil -m rm -r gs://elasticblast-demo-$(shell cat bucket-suffix.txt) 76 | 77 | 78 | ## AWS 79 | aws-init: bucket-suffix.txt 80 | sed -i~ -e 's/BUCKET_SUFFIX/$(shell cat $<)/' elb-*.ini 81 | aws s3 mb s3://elasticblast-demo-$(shell cat $<) 82 | 83 | aws-run: 84 | ${ELB} submit --cfg ${AWS_CFG} --logfile ${AWS_LOG} --loglevel DEBUG 85 | 86 | aws-results: 87 | aws s3 ls --recursive $(shell awk -F= '/^results/ {print $$2}' ${AWS_CFG}) 88 | aws s3 cp $(shell awk -F= '/^results/ {print $$2}' ${AWS_CFG})/batch_000-blastn-pdbnt.out.gz - | gzip -cd - 89 | 90 | aws-status: 91 | ${ELB} status --cfg ${AWS_CFG} --logfile ${AWS_LOG} --loglevel DEBUG 92 | 93 | aws-delete: 94 | ${ELB} delete --cfg ${AWS_CFG} --logfile ${AWS_LOG} --loglevel DEBUG 95 | 96 | aws-creds: 97 | aws sts get-caller-identity 98 | 99 | aws-config: 100 | cat -n ${AWS_CFG} 101 | 102 | aws-log: 103 | cat -n ${AWS_LOG} 104 | 105 | aws-distclean: 106 | aws s3 rm s3://elasticblast-demo-$(shell cat bucket-suffix.txt) 107 | 108 | distclean: 109 | ${RM} ${GCP_LOG} ${AWS_LOG} 110 | 111 | # Demo installation from PyPI 112 | .PHONY: pypi 113 | pypi: 114 | python3 -m venv .env 115 | source .env/bin/activate && pip install elastic-blast awscli 116 | .env/bin/elastic-blast --version 117 | -------------------------------------------------------------------------------- /docker-demo/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/docker' 3 | args: ['build', '--build-arg', 'version=${_VERSION}', '-t', 'gcr.io/$PROJECT_ID/${_IMG}:${_TAG}', '.'] 4 | 5 | substitutions: 6 | _IMG: 7 | _TAG: 8 | _VERSION: 9 | 10 | images: 11 | - 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 12 | 13 | tags: 14 | - '${_TAG}' 15 | - '${_VERSION}' 16 | -------------------------------------------------------------------------------- /docker-demo/elb-aws-blastn-mane-pdbnt.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file to run blastn of Human RNA sequences from Matched Annotation by NCBI and EMBL-EBI (MANE) project against pdbnt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | 6 | [cluster] 7 | machine-type = m5.8xlarge 8 | num-nodes = 1 9 | num-cpus = 4 10 | pd-size = 500G 11 | 12 | [blast] 13 | program = blastn 14 | options = -outfmt 7 15 | db = pdbnt 16 | mem-request = 500M 17 | mem-limit = 1.0G 18 | queries = s3://elasticblast-test/queries/MANE.GRCh38.v0.8.select_refseq_rna.fna 19 | batch-len = 5000000 20 | results = s3://elasticblast-demo-BUCKET_SUFFIX/elb-demo/blastn 21 | -------------------------------------------------------------------------------- /docker-demo/elb-gcp-blastn-mane-pdbnt.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file to run blastn of Human RNA sequences from Matched Annotation by NCBI and EMBL-EBI (MANE) project against pdbnt 2 | 3 | [cloud-provider] 4 | gcp-project = ncbi-sandbox-blast 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine_type = n1-standard-32 10 | num-nodes = 1 11 | num-cpus = 4 12 | pd-size = 500G 13 | 14 | [blast] 15 | program = blastn 16 | options = -outfmt 7 17 | db = pdbnt 18 | mem-request = 500M 19 | mem-limit = 1.0G 20 | queries = gs://elastic-blast-samples/queries/MANE/MANE.GRCh38.v0.8.select_refseq_rna.fna 21 | batch-len = 5000000 22 | results = gs://elasticblast-demo-BUCKET_SUFFIX/elb-demo/blastn 23 | -------------------------------------------------------------------------------- /docker-demo/elb-gcp-blastp-coala-refseq-prot.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file to run blastx Human RNA sequences from Matched Annotation by NCBI and EMBL-EBI (MANE) project against swissprot 2 | 3 | [cloud-provider] 4 | gcp-project = ncbi-sandbox-blast 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine_type = n1-standard-32 10 | num-nodes = 10 11 | num-cpus = 4 12 | pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | program = blastp 17 | options = -task blastp-fast -evalue 0.01 -outfmt 7 18 | db = pdbaa 19 | mem-request = 0.5G 20 | mem-limit = 1G 21 | queries = gs://elastic-blast-samples/queries/protein/COALA70.fa.gz 22 | batch-len = 100000 23 | results = gs://elasticblast-demo-BUCKET_SUFFIX/elb-demo/blastp 24 | -------------------------------------------------------------------------------- /docker-demo/elb-gcp-blastx-mane-swissprot.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file to run blastx Human RNA sequences from Matched Annotation by NCBI and EMBL-EBI (MANE) project against swissprot 2 | 3 | [cloud-provider] 4 | gcp-project = ncbi-sandbox-blast 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine_type = n1-standard-32 10 | num-nodes = 1 11 | num-cpus = 4 12 | pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | program = blastx 17 | options = -task blastx-fast -evalue 0.01 -outfmt 7 18 | db = swissprot 19 | mem-request = 500M 20 | mem-limit = 1.0G 21 | queries = gs://elastic-blast-samples/queries/MANE/MANE.GRCh38.v0.8.select_refseq_rna.fna 22 | batch-len = 5000000 23 | results = gs://elasticblast-demo-BUCKET_SUFFIX/elb-demo/blastx 24 | -------------------------------------------------------------------------------- /docker-demo/requirements.txt: -------------------------------------------------------------------------------- 1 | awscli 2 | elastic-blast 3 | -------------------------------------------------------------------------------- /docker-demo/test-cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 3 | args: ['gcloud', 'version'] 4 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 5 | args: ['kubectl', 'version', '--client=true'] 6 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 7 | args: ['elastic-blast', '--version'] 8 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 9 | args: ['aws', '--version'] 10 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 11 | args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/'] 12 | 13 | substitutions: 14 | _IMG: 15 | _TAG: 16 | -------------------------------------------------------------------------------- /docker-janitor/Dockerfile-build-from-local-sources.gcp: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM google/cloud-sdk:alpine 22 | 23 | ARG version 24 | 25 | LABEL Description="NCBI ElasticBLAST Janitor Module" 26 | LABEL Version=${version} 27 | LABEL Vendor="NCBI/NLM/NIH" 28 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 29 | 30 | COPY elastic-blast-janitor.sh /usr/bin/ 31 | 32 | RUN chmod +x /usr/bin/elastic-blast-janitor.sh && \ 33 | apk -U upgrade && \ 34 | apk add --no-cache bash python3 py3-pip py3-wheel curl && \ 35 | pip3 install --no-cache-dir --upgrade --break-system-packages pip && \ 36 | mkdir /var/elastic-blast && \ 37 | rm -rf /var/cache/apk/* 38 | 39 | RUN gcloud -q components install kubectl 40 | 41 | COPY src/ /var/elastic-blast/src/ 42 | COPY bin/ /var/elastic-blast/bin/ 43 | COPY requirements/ /var/elastic-blast/requirements/ 44 | COPY setup.py /var/elastic-blast/setup.py 45 | COPY setup.cfg_cloud /var/elastic-blast/setup.cfg 46 | 47 | WORKDIR /var/elastic-blast 48 | 49 | RUN pip3 install . -r requirements/test.txt --break-system-packages 50 | 51 | CMD ["/usr/bin/elastic-blast-janitor.sh"] 52 | -------------------------------------------------------------------------------- /docker-janitor/Dockerfile.gcp: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM google/cloud-sdk:alpine 22 | 23 | ARG version 24 | 25 | LABEL Description="NCBI ElasticBLAST Janitor Module" 26 | LABEL Version=${version} 27 | LABEL Vendor="NCBI/NLM/NIH" 28 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 29 | 30 | COPY requirements.txt . 31 | COPY elastic-blast-janitor.sh /usr/bin/ 32 | 33 | RUN chmod +x /usr/bin/elastic-blast-janitor.sh && \ 34 | apk -U upgrade && \ 35 | apk add --no-cache bash python3 py3-pip py3-wheel curl && \ 36 | pip3 install --no-cache-dir --upgrade --break-system-packages pip && \ 37 | pip3 install --no-cache-dir -r requirements.txt --break-system-packages && rm -rf /var/cache/apk/* requirements.txt 38 | 39 | RUN gcloud -q components install kubectl 40 | 41 | CMD ["/usr/bin/elastic-blast-janitor.sh"] 42 | -------------------------------------------------------------------------------- /docker-janitor/Makefile: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | # Makefile for building a container to run an ElasticBLAST demo 22 | # 23 | # Author: Christiam Camacho 24 | # Created: Tue Aug 31 11:29:06 EDT 2021 25 | 26 | SHELL=/bin/bash 27 | .PHONY: all pre-check check clean build publish gcp-build gcp-check gcp-clean 28 | 29 | IMG?=ncbi/elasticblast-janitor 30 | VERSION?=0.4.0 31 | ELB_VERSION?=$(shell git describe --tags --abbrev=0) 32 | GCP_PROJECT?=$(shell gcloud config get-value project 2>/dev/null) 33 | GCP_TEST_BUCKET?=gs://elasticblast-test/query-split-run-test 34 | 35 | # For gcp-test 36 | ELB_GCP_PROJECT?=ncbi-sandbox-blast 37 | ELB_GCP_REGION?=us-east4 38 | ELB_GCP_ZONE?=us-east4-a 39 | ELB_RESULTS?=gs://elasticblast-test 40 | ELB_CLUSTER_NAME?=elb-test-cluster 41 | 42 | 43 | all: gcp-build gcp-check 44 | 45 | build: 46 | docker build --build-arg version=${VERSION} -f Dockerfile.gcp -t ${IMG}:${VERSION} . 47 | docker tag ${IMG}:${VERSION} ${IMG}:latest 48 | 49 | publish: build 50 | docker push ${IMG}:${VERSION} 51 | docker push ${IMG}:latest 52 | 53 | clean: 54 | -docker image rm ${IMG}:${VERSION} ${IMG}:latest 55 | 56 | check: 57 | -docker run --rm ${IMG}:${VERSION} elastic-blast --version 58 | -docker run --rm ${IMG}:${VERSION} 59 | 60 | gcp-build: 61 | gcloud builds submit --config cloudbuild.yaml --substitutions _VERSION=${VERSION},_IMG=${IMG} 62 | 63 | # Use this target to build an image from your local sources as opposed to those on PyPI.org 64 | .PHONY: gcp-build-from-local-sources 65 | gcp-build-from-local-sources: 66 | rsync -a ../setup.py ../setup.cfg_cloud ../src ../bin ../requirements ${PWD}/ 67 | sed -i~ -e '/^value = $${VERSION}/d;' setup.cfg_cloud 68 | echo "value = ${ELB_VERSION}" >> setup.cfg_cloud 69 | -gcloud builds submit --config cloudbuild.yaml --substitutions _VERSION=${VERSION},_IMG=${IMG},_DOCKERFILE='Dockerfile-build-from-local-sources.gcp' 70 | rm -fr src bin requirements setup.cfg_cloud setup.py 71 | 72 | .PHONY: gcp-test 73 | gcp-test: 74 | gcloud builds submit --config cloudrun.yaml --substitutions _IMG="gcr.io/${GCP_PROJECT}/${IMG}:${VERSION}",_ELB_GCP_PROJECT="${ELB_GCP_PROJECT}",_ELB_GCP_REGION="${ELB_GCP_REGION}",_ELB_GCP_ZONE="${ELB_GCP_ZONE}",_ELB_RESULTS="${ELB_RESULTS}",_ELB_CLUSTER_NAME="${ELB_CLUSTER_NAME}" 75 | 76 | gcp-clean: 77 | -gcloud container images delete gcr.io/${GCP_PROJECT}/${IMG}:${VERSION} 78 | 79 | gcp-check: 80 | -gcloud container images list --repository=gcr.io/${GCP_PROJECT}/${IMG} 81 | -gcloud container images list-tags gcr.io/${GCP_PROJECT}/${IMG} 82 | -gcloud container images describe gcr.io/${GCP_PROJECT}/${IMG}:latest 83 | -gcloud container images describe gcr.io/${GCP_PROJECT}/${IMG}:${VERSION} 84 | #gcloud builds submit --config test-cloudbuild.yaml --substitutions _TAG=$(VERSION),_IMG=${IMG} 85 | 86 | gcp-list-tagless-images: 87 | gcloud container images list-tags gcr.io/${GCP_PROJECT}/${IMG} \ 88 | --filter='-tags:*' --format="get(digest)" 89 | -------------------------------------------------------------------------------- /docker-janitor/README.md: -------------------------------------------------------------------------------- 1 | ElasticBLAST Janitor docker image 2 | ================================= 3 | 4 | This docker image encapsulates functionality to support automatic shutdown in 5 | ElasticBLAST for GCP. 6 | -------------------------------------------------------------------------------- /docker-janitor/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/docker' 3 | args: ['build', '--build-arg', 'version=${_VERSION}', '-t', 'gcr.io/$PROJECT_ID/${_IMG}:${_VERSION}', '-f', '${_DOCKERFILE}', '.'] 4 | - name: 'gcr.io/cloud-builders/docker' 5 | args: ['tag', 'gcr.io/${PROJECT_ID}/${_IMG}:${_VERSION}', 'gcr.io/${PROJECT_ID}/${_IMG}:latest'] 6 | 7 | substitutions: 8 | _IMG: 9 | _VERSION: 10 | _DOCKERFILE: 'Dockerfile.gcp' 11 | 12 | images: 13 | - 'gcr.io/${PROJECT_ID}/${_IMG}:${_VERSION}' 14 | - 'gcr.io/${PROJECT_ID}/${_IMG}:latest' 15 | 16 | tags: 17 | - '${_VERSION}' 18 | -------------------------------------------------------------------------------- /docker-janitor/cloudrun.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: '${_IMG}' 3 | args: ['elastic-blast', '--version'] 4 | - name: '${_IMG}' 5 | env: 6 | - 'ELB_GCP_PROJECT=${_ELB_GCP_PROJECT}' 7 | - 'ELB_GCP_REGION=${_ELB_GCP_REGION}' 8 | - 'ELB_GCP_ZONE=${_ELB_GCP_ZONE}' 9 | - 'ELB_RESULTS=${_ELB_RESULTS}' 10 | - 'ELB_CLUSTER_NAME=${_ELB_CLUSTER_NAME}' 11 | args: ['/bin/bash', '-x', '/usr/bin/elastic-blast-janitor.sh'] 12 | 13 | substitutions: 14 | _ELB_GCP_PROJECT: 'ncbi-sandbox-blast' 15 | _ELB_GCP_REGION: 'us-east4' 16 | _ELB_GCP_ZONE: 'us-east4-a' 17 | _ELB_RESULTS: 'gs://elasticblast-test' 18 | _ELB_CLUSTER_NAME: 'elb-test-cluster' 19 | -------------------------------------------------------------------------------- /docker-janitor/requirements.txt: -------------------------------------------------------------------------------- 1 | elastic-blast 2 | -------------------------------------------------------------------------------- /docker-job-submit/.gitignore: -------------------------------------------------------------------------------- 1 | templates/ 2 | src/ 3 | bin/ 4 | requirements/ 5 | setup.cfg_cloud 6 | setup.py 7 | -------------------------------------------------------------------------------- /docker-job-submit/Dockerfile-build-from-local-sources.aws: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM alpine:3.14 22 | ARG version 23 | 24 | LABEL Description="NCBI ElasticBLAST Cloud Job Submission Module" 25 | LABEL Version=${version} 26 | LABEL Vendor="NCBI/NLM/NIH" 27 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 28 | 29 | COPY submit_jobs.py /usr/bin/ 30 | 31 | RUN chmod +x /usr/bin/submit_jobs.py && \ 32 | apk -U upgrade && \ 33 | apk add --no-cache bash python3 py3-pip py3-wheel curl unzip && \ 34 | pip3 install --no-cache-dir --upgrade pip && \ 35 | mkdir -p /var/elastic-blast && \ 36 | rm -rf /var/cache/apk/* 37 | 38 | COPY src/ /var/elastic-blast/src/ 39 | COPY bin/ /var/elastic-blast/bin/ 40 | COPY requirements/ /var/elastic-blast/requirements/ 41 | COPY setup.py /var/elastic-blast/setup.py 42 | COPY setup.cfg_cloud /var/elastic-blast/setup.cfg 43 | 44 | WORKDIR /var/elastic-blast 45 | 46 | RUN python3 -m venv /var/elastic-blast/.venv && \ 47 | source /var/elastic-blast/.venv && \ 48 | pip3 install . -r requirements/base.txt --no-cache-dir 49 | 50 | CMD ["/usr/bin/submit_jobs.py", "--help"] 51 | -------------------------------------------------------------------------------- /docker-job-submit/Dockerfile.aws: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM alpine:3.14 22 | ARG version 23 | 24 | COPY requirements.txt . 25 | COPY submit_jobs.py /usr/bin/ 26 | 27 | RUN chmod +x /usr/bin/submit_jobs.py && \ 28 | apk -U upgrade && \ 29 | apk add --no-cache bash python3 py3-pip py3-wheel curl unzip && \ 30 | pip3 install --no-cache-dir --upgrade pip && \ 31 | python3 -m venv /.venv && source /.venv/bin/activate && \ 32 | pip3 install --no-cache-dir -r requirements.txt && rm -rf /var/cache/apk/* requirements.txt 33 | 34 | LABEL Description="NCBI ElasticBLAST Cloud Job Submission Module" 35 | LABEL Version=${version} 36 | LABEL Vendor="NCBI/NLM/NIH" 37 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 38 | 39 | CMD ["/usr/bin/submit_jobs.py", "--help"] 40 | -------------------------------------------------------------------------------- /docker-job-submit/Dockerfile.gcp: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM google/cloud-sdk:alpine 22 | 23 | ARG version 24 | 25 | LABEL Description="NCBI ElasticBLAST Cloud Job Submission Module" 26 | LABEL Version=${version} 27 | LABEL Vendor="NCBI/NLM/NIH" 28 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 29 | 30 | COPY cloud-job-submit.sh /usr/bin/ 31 | COPY templates/volume-snapshot-class.yaml /templates/ 32 | COPY templates/volume-snapshot.yaml /templates/ 33 | COPY templates/pvc-rom.yaml.template /templates/ 34 | 35 | RUN chmod +x /usr/bin/cloud-job-submit.sh && \ 36 | apk -U upgrade && \ 37 | apk add --no-cache bash gettext curl jq && \ 38 | rm -rf /var/cache/apk/* 39 | 40 | RUN gcloud -q components install kubectl 41 | 42 | CMD ["/usr/bin/cloud-job-submit.sh"] 43 | -------------------------------------------------------------------------------- /docker-job-submit/awscloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'docker' 3 | args: [ 'build', '-t', '${_IMG}:$TAG_NAME', '-t', '${_IMG}:latest', '-f', '${_DOCKERFILE}', '.' ] 4 | - name: 'docker' 5 | args: [ 'login', '-u', 'AWS', '-p', '${_AWS_ECR_PASSWD}', '${_SERVER}' ] 6 | - name: 'docker' 7 | args: [ 'push', '${_IMG}:$TAG_NAME' ] 8 | - name: 'docker' 9 | args: [ 'push', '${_IMG}:latest' ] 10 | 11 | substitutions: 12 | _DOCKERFILE: 'Dockerfile' 13 | -------------------------------------------------------------------------------- /docker-job-submit/awscloudrun.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: '${_IMG}' 3 | args: ['elastic-blast', '--version'] 4 | - name: '${_IMG}' 5 | env: 6 | - 'ELB_RESULTS=${_ELB_RESULTS}' 7 | - 'ELB_CLUSTER_NAME=${_ELB_CLUSTER_NAME}' 8 | args: ['/usr/bin/submit_jobs.py', '--help'] 9 | 10 | substitutions: 11 | _ELB_RESULTS: 's3://elasticblast-test' 12 | _ELB_CLUSTER_NAME: 'elb-test-cluster' 13 | -------------------------------------------------------------------------------- /docker-job-submit/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/docker' 3 | args: ['build', '--build-arg', 'version=${_VERSION}', '-t', 'gcr.io/$PROJECT_ID/${_IMG}:${_VERSION}', '-f', '${_DOCKERFILE}', '.'] 4 | - name: 'gcr.io/cloud-builders/docker' 5 | args: ['tag', 'gcr.io/${PROJECT_ID}/${_IMG}:${_VERSION}', 'gcr.io/${PROJECT_ID}/${_IMG}:latest'] 6 | 7 | substitutions: 8 | _IMG: 9 | _VERSION: 10 | _DOCKERFILE: 'Dockerfile.gcp' 11 | 12 | images: 13 | - 'gcr.io/${PROJECT_ID}/${_IMG}:${_VERSION}' 14 | - 'gcr.io/${PROJECT_ID}/${_IMG}:latest' 15 | 16 | tags: 17 | - '${_VERSION}' 18 | -------------------------------------------------------------------------------- /docker-job-submit/cloudrun.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: '${_IMG}' 3 | args: ['kubectl', 'version', '--client=true'] 4 | - name: '${_IMG}' 5 | args: ['envsubst', '--version'] 6 | - name: '${_IMG}' 7 | env: 8 | - 'ELB_GCP_PROJECT=${_ELB_GCP_PROJECT}' 9 | - 'ELB_GCP_ZONE=${_ELB_GCP_ZONE}' 10 | - 'ELB_RESULTS=${_ELB_RESULTS}' 11 | - 'ELB_CLUSTER_NAME=${_ELB_CLUSTER_NAME}' 12 | args: ['/bin/bash', '-x', '/usr/bin/cloud-job-submit.sh'] 13 | 14 | substitutions: 15 | _ELB_GCP_PROJECT: 'ncbi-sandbox-blast' 16 | _ELB_GCP_ZONE: 'us-east4-a' 17 | _ELB_RESULTS: 'gs://elasticblast-test' 18 | _ELB_CLUSTER_NAME: 'elb-test-cluster' 19 | -------------------------------------------------------------------------------- /docker-job-submit/requirements.txt: -------------------------------------------------------------------------------- 1 | elastic-blast 2 | -------------------------------------------------------------------------------- /docker-job-submit/submit_jobs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # PUBLIC DOMAIN NOTICE 3 | # National Center for Biotechnology Information 4 | # 5 | # This software is a "United States Government Work" under the 6 | # terms of the United States Copyright Act. It was written as part of 7 | # the authors' official duties as United States Government employees and 8 | # thus cannot be copyrighted. This software is freely available 9 | # to the public for use. The National Library of Medicine and the U.S. 10 | # Government have not placed any restriction on its use or reproduction. 11 | # 12 | # Although all reasonable efforts have been taken to ensure the accuracy 13 | # and reliability of the software and data, the NLM and the U.S. 14 | # Government do not and cannot warrant the performance or results that 15 | # may be obtained by using this software or data. The NLM and the U.S. 16 | # Government disclaim all warranties, express or implied, including 17 | # warranties of performance, merchantability or fitness for any particular 18 | # purpose. 19 | # 20 | # Please cite NCBI in any work or product based on this material. 21 | 22 | # Script to submit ElasticBLAST jobs by an AWS Batch job 23 | # 24 | # Author: Greg Boratyn boratyng@ncbi.nlm.nih.gov 25 | 26 | 27 | import argparse 28 | import logging 29 | import os 30 | from elastic_blast.base import QuerySplittingResults 31 | from elastic_blast.filehelper import harvest_query_splitting_results, open_for_read 32 | from elastic_blast.constants import ElbCommand, ELB_DFLT_LOGLEVEL 33 | from elastic_blast.constants import ELB_METADATA_DIR, ELB_META_CONFIG_FILE 34 | from elastic_blast.aws import ElasticBlastAws, handle_aws_error 35 | from elastic_blast.elb_config import ElasticBlastConfig 36 | from elastic_blast.util import config_logging 37 | from elastic_blast.base import MemoryStr 38 | from elastic_blast import VERSION 39 | 40 | DESC = f'Helper script to submit ElasticBLAST (version {VERSION}) jobs remotely' 41 | 42 | @handle_aws_error 43 | def main(): 44 | """Main function""" 45 | parser = create_arg_parser() 46 | args = parser.parse_args() 47 | 48 | config_logging(args) 49 | logging.info(f"ElasticBLAST submit_jobs.py {VERSION}") 50 | 51 | cfg_uri = os.path.join(args.results, ELB_METADATA_DIR, ELB_META_CONFIG_FILE) 52 | logging.debug(f"Loading {cfg_uri}") 53 | with open_for_read(cfg_uri) as f: 54 | cfg_json = f.read() 55 | cfg = ElasticBlastConfig.from_json(cfg_json) 56 | logging.debug(f'AWS region: {cfg.aws.region}') 57 | cfg.validate(ElbCommand.SUBMIT) 58 | eb = ElasticBlastAws(cfg, False) 59 | 60 | bucket = cfg.cluster.results 61 | logging.info(f'Bucket: {bucket}') 62 | qr = harvest_query_splitting_results(bucket) 63 | logging.debug(f'Submitting jobs for query batches: {" ".join(qr.query_batches)}') 64 | eb.client_submit(qr.query_batches, False) 65 | 66 | 67 | def create_arg_parser(): 68 | """ Create the command line options parser object for this script. """ 69 | parser = argparse.ArgumentParser(description=DESC) 70 | parser.add_argument('--results', metavar='STR', type=str, help='Results bucket', required=True) 71 | parser.add_argument("--logfile", default='stderr', type=str, 72 | help=f"Default: stderr") 73 | parser.add_argument("--loglevel", default='DEBUG', 74 | help=f"Default: DEBUG", 75 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]) 76 | parser.add_argument('--version', action='version', version='%(prog)s ' + VERSION) 77 | 78 | 79 | return parser 80 | 81 | if __name__ == '__main__': 82 | main() 83 | 84 | 85 | # vim: set syntax=python ts=4 et : 86 | -------------------------------------------------------------------------------- /docker-qs/Dockerfile: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | #FROM alpine:3.14 22 | FROM google/cloud-sdk:alpine 23 | ARG version 24 | 25 | COPY requirements.txt . 26 | COPY run.sh /usr/bin/ 27 | 28 | RUN chmod +x /usr/bin/run.sh && \ 29 | apk -U upgrade && \ 30 | apk add --no-cache bash python3 py3-pip py3-wheel curl unzip && \ 31 | pip3 install --no-cache-dir --upgrade pip --break-system-packages && \ 32 | pip3 install --no-cache-dir -r requirements.txt --break-system-packages && rm -rf /var/cache/apk/* requirements.txt 33 | 34 | LABEL Description="NCBI ElasticBLAST Query Splitting Module" 35 | LABEL Version=${version} 36 | LABEL Vendor="NCBI/NLM/NIH" 37 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 38 | 39 | 40 | CMD ["run.sh", "-h"] 41 | -------------------------------------------------------------------------------- /docker-qs/Dockerfile-build-from-local-sources: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | FROM alpine:3.14 22 | ARG version 23 | 24 | LABEL Description="NCBI ElasticBLAST Cloud Job Submission Module" 25 | LABEL Version=${version} 26 | LABEL Vendor="NCBI/NLM/NIH" 27 | LABEL Maintainer=camacho@ncbi.nlm.nih.gov 28 | 29 | COPY requirements.txt . 30 | RUN sed -i '/elastic-blast/d' requirements.txt 31 | 32 | COPY run.sh /usr/bin/ 33 | 34 | RUN chmod +x /usr/bin/run.sh && \ 35 | apk -U upgrade && \ 36 | apk add --no-cache bash python3 py3-pip py3-wheel && \ 37 | pip3 install --no-cache-dir --upgrade pip --break-system-packages && \ 38 | pip3 install --no-cache-dir -r requirements.txt --break-system-packages && \ 39 | mkdir -p /var/elastic-blast && \ 40 | rm -rf /var/cache/apk/* requirements.txt 41 | 42 | COPY src/ /var/elastic-blast/src/ 43 | COPY bin/ /var/elastic-blast/bin/ 44 | COPY requirements/ /var/elastic-blast/requirements/ 45 | COPY setup.py /var/elastic-blast/setup.py 46 | COPY setup.cfg_cloud /var/elastic-blast/setup.cfg 47 | 48 | WORKDIR /var/elastic-blast 49 | 50 | RUN pip3 install . -r requirements/base.txt 51 | 52 | CMD ["run.sh", "-h"] 53 | -------------------------------------------------------------------------------- /docker-qs/README.md: -------------------------------------------------------------------------------- 1 | Query splitting docker image 2 | ============================ 3 | 4 | This docker image encapsulates the functionality to perform query splitting 5 | for ElasticBLAST on the cloud (as opposed to the local client invoking 6 | ElasticBLAST). 7 | 8 | The `Makefile` contains targets to build, test and deploy the docker image in 9 | various repositories. 10 | 11 | If you have `docker` available, run `make build` to build the image, and `make 12 | check` to test it locally. 13 | 14 | -------------------------------------------------------------------------------- /docker-qs/awscloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'docker' 3 | args: [ 'build', '-t', '${_IMG}:$TAG_NAME', '-t', '${_IMG}:latest', '-f', '${_DOCKERFILE}', '.' ] 4 | - name: 'docker' 5 | args: [ 'login', '-u', 'AWS', '-p', '${_AWS_ECR_PASSWD}', '${_SERVER}' ] 6 | - name: 'docker' 7 | args: [ 'push', '${_IMG}:$TAG_NAME' ] 8 | - name: 'docker' 9 | args: [ 'push', '${_IMG}:latest' ] -------------------------------------------------------------------------------- /docker-qs/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/docker' 3 | args: ['build', '--build-arg', 'version=${_VERSION}', '-t', 'gcr.io/$PROJECT_ID/${_IMG}:${_VERSION}', '.'] 4 | - name: 'gcr.io/cloud-builders/docker' 5 | args: ['tag', 'gcr.io/${PROJECT_ID}/${_IMG}:${_VERSION}', 'gcr.io/${PROJECT_ID}/${_IMG}:latest'] 6 | 7 | substitutions: 8 | _IMG: 9 | _VERSION: 10 | 11 | images: 12 | - 'gcr.io/${PROJECT_ID}/${_IMG}:${_VERSION}' 13 | - 'gcr.io/${PROJECT_ID}/${_IMG}:latest' 14 | 15 | tags: 16 | - '${_VERSION}' 17 | -------------------------------------------------------------------------------- /docker-qs/cloudrun.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: '${_IMG}' 3 | args: ['sh', '-c', '${_CMD}'] 4 | -------------------------------------------------------------------------------- /docker-qs/requirements.txt: -------------------------------------------------------------------------------- 1 | awscli 2 | elastic-blast 3 | -------------------------------------------------------------------------------- /docker-qs/test-cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 3 | args: ['aws', '--version'] 4 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 5 | args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/'] 6 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 7 | args: ['fasta_split.py', '--help'] 8 | - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 9 | args: ['run.sh', '-h'] 10 | # This doesn't work due to lack of credentials 11 | #- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' 12 | # args: ['run.sh', '-i', 's3://elasticblast-test/queries/hepatitis.fsa.gz', '-o', 's3://elasticblast-test/'] 13 | 14 | substitutions: 15 | _IMG: 16 | _TAG: 17 | -------------------------------------------------------------------------------- /docs/README-fasta-split.md: -------------------------------------------------------------------------------- 1 | # fasta_split 2 | 3 | Splits large FASTA file into several files of approximately same size. 4 | 5 | It does not split file mid-sequence, so all sequences are preserved in the final file set. 6 | 7 | The original file can be from local file system, from GCP's GS file system, or accessible 8 | from HTTP(S) or FTP URL. 9 | 10 | The file can be GZIPped, or a set of files can be archived by TAR and optionally compressed 11 | by GZIP or BZIP2. Set of files in TAR is interpreted as if it is merged in one large file. 12 | 13 | The destination for all of the generated files (batches, jobs, and manifest) can be either 14 | local or GS file system. 15 | 16 | It generates job description files from template by substituting variables in the text 17 | of the template. 18 | 19 | All jobs are listed in manifest file which by default is written to STDOUT. 20 | 21 | $./fasta_split -h 22 | usage: fasta_split [-h] [-l BATCH_LEN] [-o OUTPUT] [-r RESULTS] [-j JOB_PATH] 23 | [-t TEMPLATE] [-m MANIFEST] 24 | inputSplit FASTA filepositional arguments: 25 | input input FASTA file, possible gzippedoptional arguments: 26 | -h, --help show this help message and exit 27 | -l BATCH_LEN, --batch_len BATCH_LEN 28 | batch length 29 | -o OUTPUT, --output OUTPUT 30 | output path for batch FASTA files 31 | -r RESULTS, --results RESULTS 32 | output path for BLAST results 33 | -j JOB_PATH, --job_path JOB_PATH 34 | output path for job YAML files 35 | -t TEMPLATE, --template TEMPLATE 36 | YAML template 37 | -m MANIFEST, --manifest MANIFEST 38 | manifest file to write 39 | 40 | The script substitutes the following variables in template YAML file while generating job YAML files. 41 | Assuming the script writes a specific batch file as gs://path_to_input/batch_000.fa and 42 | results parameter is 'path_to_results': 43 | 44 | {QUERY} - batch_000 45 | {QUERY_FQN} - gs://path_to_input/batch_000.fa 46 | {QUERY_PATH} - gs://path_to_input 47 | {QUERY_NUM} - 000 48 | {RESULTS} - path_to_results 49 | -------------------------------------------------------------------------------- /lambda-janitor/.gitignore: -------------------------------------------------------------------------------- 1 | response[12].json 2 | .env-testing 3 | -------------------------------------------------------------------------------- /lambda-janitor/README.md: -------------------------------------------------------------------------------- 1 | ElasticBLAST Janitor lambda function 2 | ==================================== 3 | 4 | The code in this directory encapsulates functionality to support automatic shutdown in 5 | ElasticBLAST for AWS. 6 | 7 | Overview 8 | -------- 9 | 10 | The ElasticBLAST janitor in AWS is implemented as a CloudFormation stack which 11 | is nested inside the main ElasticBLAST CloudFormation stack. 12 | 13 | The ElasticBLAST janitor CloudFormation stack must be deployed to a publicly 14 | accessible S3 bucket. It refers to a Zip archive containing the code to run 15 | the ElasticBLAST janitor. This Zip archive must also be deployed to a publicly 16 | accessible S3 bucket. 17 | 18 | The ElasticBLAST janitor CloudFormation stack contains 2 sets of resources: 19 | 20 | 1. CopyZips resources: these copy the Zip archive containing the ElasticBLAST 21 | janitor code from its public location to a temporary bucket created for the 22 | ElasticBLAST invocation. 23 | 2. ElasticBLAST janitor: lambda function, execution role, permission and event rule to 24 | enable, schedule and run the ElasticBLAST janitor functionality. 25 | 26 | Implementation 27 | -------------- 28 | 29 | The lambda function code resides in `lambda_elb.py`, though the core code is 30 | in the `elastic_blast` Python module. This lambda function and its 31 | dependencies are deployed to S3 as a Zip archive (see 32 | `elasticblast-janitor-lambda-deployment.zip Makefile` target). 33 | 34 | N.B: The `create-admin-role STACK_NAME=elasticblast` creates a CloudFormation 35 | stack with the necessary role for the nested CloudFormation stack to execute. 36 | 37 | Maintainer instructions 38 | ----------------------- 39 | 40 | * Creating the admin role needed to run the janitor: `make create-lambda-role` 41 | * Testing the janitor function in the local host: `make -C.. aws-janitor-smoke-test` 42 | * Testing lambda function code in isolation: `make test-lambda`. Be sure to 43 | refresh or set the `VENV and ELB_RESULTS Makefile` variables accordingly. 44 | * Deploying lambda standalone function: `make deploy` 45 | * Remove lambda standalone function: `make undeploy` 46 | * Test lambda function deployed via CLI: `make invoke` 47 | * Deploy cloudformation stack for janitor: `make upload-template` 48 | * Deploy to production: `make deploy-to-production` 49 | 50 | -------------------------------------------------------------------------------- /lambda-janitor/janitor-test-stack.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: ElasticBLAST Janitor Test stack which copies lambda function into a temporary bucket and runs it on a schedule 3 | Parameters: 4 | Owner: 5 | Description: User who creates/runs this application 6 | Type: String 7 | 8 | JanitorSchedule: 9 | Description: Cron expression to determine when to run the ElasticBLAST Janitor module. 10 | Type: String 11 | 12 | JanitorTemplateUrl: 13 | Description: URL for the tested Janitor template. 14 | Type: String 15 | 16 | JanitorLambdaDeploymentS3Bucket: 17 | Description: Bucket where the janitor lambda function deployment resides. 18 | Type: String 19 | 20 | JanitorLambdaDeploymentS3Key: 21 | Description: Key within the bucket where the janitor lambda function deployment resides. 22 | Type: String 23 | 24 | ElbResults: 25 | Description: ElasticBLAST results bucket 26 | Type: String 27 | 28 | 29 | Resources: 30 | JanitorStack: 31 | Type: "AWS::CloudFormation::Stack" 32 | Properties: 33 | TemplateURL: !Ref JanitorTemplateUrl 34 | Parameters: 35 | StackName: !Ref 'AWS::StackName' 36 | JanitorSchedule: !Ref JanitorSchedule 37 | Owner: !Ref Owner 38 | S3Bucket: !Ref JanitorLambdaDeploymentS3Bucket 39 | S3Key: !Ref JanitorLambdaDeploymentS3Key 40 | ElbResults: !Ref ElbResults 41 | Tags: 42 | - Key: Name 43 | Value: !Join [-, [elasticblast, !Ref Owner, 'janitor']] 44 | - Key: Project 45 | Value: BLAST 46 | - Key: Owner 47 | Value: !Ref Owner 48 | - Key: billingcode 49 | Value: elastic-blast 50 | 51 | -------------------------------------------------------------------------------- /lambda-janitor/requirements-for-testing.txt: -------------------------------------------------------------------------------- 1 | elastic-blast==0.0.80 2 | --extra-index-url https://test.pypi.org/simple/ 3 | -------------------------------------------------------------------------------- /lambda-janitor/requirements.txt: -------------------------------------------------------------------------------- 1 | elastic-blast 2 | -------------------------------------------------------------------------------- /lambda-janitor/trust-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Principal": { 7 | "Service": "lambda.amazonaws.com" 8 | }, 9 | "Action": "sts:AssumeRole" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /pex-cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | # Prepare config file with the version 3 | - name: 'alpine' 4 | env: 5 | - 'VERSION=${_VERSION}' 6 | args: ['sh', '-c', 'apk add gettext; envsubst setup.cfg'] 7 | 8 | # Build for a single Python version 9 | - name: 'python:${_PY_VERSION}-alpine3.14' 10 | args: ['sh', '-c', 'python -mvenv .env'] 11 | - name: 'python:${_PY_VERSION}-alpine3.14' 12 | args: ['sh', '-c', '.env/bin/pip3 install -q -r requirements/base.txt'] 13 | - name: 'python:${_PY_VERSION}-alpine3.14' 14 | entrypoint: sh 15 | args: 16 | - -c 17 | - if [ ${_PY_VERSION} == "3.7" ]; then 18 | .env/bin/pex --disable-cache . -r requirements/base.txt -c elastic-blast -o elastic-blast; 19 | md5sum elastic-blast > elastic-blast.md5; 20 | fi; 21 | - name: 'python:${_PY_VERSION}-alpine3.14' 22 | args: ['sh', '-c', '.env/bin/pex --disable-cache . -r requirements/base.txt -c elastic-blast -o elastic-blast${_PY_VERSION}'] 23 | - name: 'python:${_PY_VERSION}-alpine3.14' 24 | args: ['sh', '-c', 'md5sum elastic-blast${_PY_VERSION} > elastic-blast${_PY_VERSION}.md5'] 25 | 26 | substitutions: 27 | _TARGET: 'gs://elasticblast-test/builds' 28 | 29 | artifacts: 30 | objects: 31 | location: '${_TARGET}/${_VERSION}' 32 | paths: 33 | - 'elastic-blast*' 34 | -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- 1 | wheel==0.41.2 2 | setuptools==78.1.1 3 | importlib-resources==6.1.1 4 | importlib-metadata==7.0.0 5 | pex==2.33.4 6 | boto3==1.37.12 7 | botocore==1.37.12 8 | awslimitchecker==12.0.0 9 | tenacity==9.0.0 10 | dataclasses-json==0.6.7 11 | -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- 1 | -r base.txt 2 | 3 | pytest==8.3.5 4 | pytest-cov==6.0.0 5 | pytest-mock==3.14.0 6 | teamcity-messages==1.33 7 | mypy==1.15.0 8 | pylint==3.3.5 9 | tox==4.4.12 10 | yamllint==1.36.0 11 | moto==4.2.14 12 | docker==7.1.0 13 | cfn-lint==1.30.0 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = elastic_blast 3 | description = ElasticBLAST runs BLAST searches faster by distributing its work to multiple cloud instances. This allows larger numbers of queries to be searched in less time compared to BLAST+ on a single host. Use of the cloud facilitates collaboration, sharing of results, datasets and pipelines on a common platform. The National Center for Biotechnology Information ([NCBI](https://www.ncbi.nlm.nih.gov)), part of the National Library of Medicine at the NIH, developed and maintains ElasticBLAST. 4 | long_description = file:README.md 5 | long_description_content_type = text/markdown 6 | maintainer = NCBI 7 | maintainer_email = elastic-blast-support@ncbi.nlm.nih.gov 8 | python_requires = >=3.7 9 | license_files = file:LICENSE 10 | classifiers = 11 | Programming Language :: Python :: 3 12 | Development Status :: 5 - Production/Stable 13 | Environment :: Console 14 | Operating System :: MacOS 15 | Operating System :: Unix 16 | Operating System :: POSIX :: Linux 17 | Topic :: Scientific/Engineering :: Bio-Informatics 18 | Intended Audience :: Science/Research 19 | Intended Audience :: Information Technology 20 | Intended Audience :: System Administrators 21 | Intended Audience :: Developers 22 | keywords = "BLAST", "Alignment", "Cloud Computing", "Bioinformatics" 23 | url = https://blast.ncbi.nlm.nih.gov/doc/elastic-blast 24 | project_urls = 25 | Repository = https://github.com/ncbi/elastic-blast/ 26 | ChangeLog = https://github.com/ncbi/elastic-blast/releases 27 | Support = https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/support.html 28 | Tutorials = https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/tutorials.html 29 | 30 | [files] 31 | packages_root = src 32 | 33 | scripts = 34 | bin/* 35 | 36 | data_files = 37 | share = share/** 38 | Makefile = Makefile 39 | 40 | [auto-version] 41 | type = git-pep440 42 | -------------------------------------------------------------------------------- /setup.cfg_cloud: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = elastic_blast 3 | 4 | [files] 5 | packages_root = src 6 | 7 | scripts = 8 | bin/* 9 | 10 | data_files = 11 | share = share/** 12 | Makefile = Makefile 13 | 14 | [auto-version] 15 | type = fixed 16 | value = ${VERSION} -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup(setup_requires='packit', packit=True) -------------------------------------------------------------------------------- /share/etc/elb-aws-blastn-nt-8-nodes.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | aws-vpc = vpc-0135a4f42be3b5e31 6 | aws-subnet = subnet-0f7e7fd56c7568838,subnet-0053b5ebbac7a5360,subnet-0b29e82f35656c073,subnet-0be1594579d69e7dd,subnet-0fd3737e5ecd4aab2,subnet-047a23ed06e74ce67 7 | 8 | [cluster] 9 | num-nodes = 8 10 | num-cpus = 16 11 | 12 | [blast] 13 | program = blastn 14 | db = nt 15 | queries = s3://elasticblast-test/queries/hepatitis.fsa.gz 16 | -------------------------------------------------------------------------------- /share/etc/elb-blastp-nr.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastp against nr 2 | 3 | [cloud-provider] 4 | gcp-project = 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | num-nodes = 2 10 | num-cpus = 30 11 | pd-size = 1000G 12 | #use-preemptible = 1 13 | 14 | [blast] 15 | program = blastp 16 | db = nr 17 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 18 | -------------------------------------------------------------------------------- /share/etc/yamllint-config.yaml: -------------------------------------------------------------------------------- 1 | extends: default 2 | 3 | rules: 4 | # 80 chars should be enough, but don't fail if a line is longer 5 | line-length: 6 | max: 80 7 | level: warning 8 | 9 | # don't bother me with this rule 10 | indentation: disable 11 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info -------------------------------------------------------------------------------- /src/elastic_blast/__init__.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | try: 22 | from importlib import metadata 23 | except ImportError: 24 | # Running on pre-3.8 Python; use importlib-metadata package 25 | import importlib_metadata as metadata # type: ignore 26 | 27 | VERSION = metadata.version(__package__) # type: ignore 28 | -------------------------------------------------------------------------------- /src/elastic_blast/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/src/elastic_blast/commands/__init__.py -------------------------------------------------------------------------------- /src/elastic_blast/commands/delete.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | elb/comands/delete.py - delete cluster 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | from typing import Any, List 28 | 29 | from elastic_blast.elasticblast_factory import ElasticBlastFactory 30 | from elastic_blast.constants import ElbCommand 31 | from elastic_blast.elb_config import ElasticBlastConfig 32 | 33 | # TODO: use cfg only when args.wait, args.sync, and args.run_label are replicated in cfg 34 | def delete(args, cfg: ElasticBlastConfig, clean_up_stack: List[Any]) -> int: 35 | """ Entry point to delete resources associated with an ElasticBLAST search """ 36 | cfg.validate(ElbCommand.DELETE) 37 | elastic_blast = ElasticBlastFactory(cfg, False, clean_up_stack) 38 | elastic_blast.delete() 39 | return 0 40 | -------------------------------------------------------------------------------- /src/elastic_blast/elasticblast_factory.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | elastic_blast/elasticblast_factory.py - Factory for ElasticBlastXXX 23 | 24 | Author: Victor Joukov (joukovv@ncbi.nlm.nih.gov) 25 | Created: Mon 13 Sep 2021 05:17:00 PM EDT 26 | """ 27 | 28 | from elastic_blast.aws import ElasticBlastAws 29 | from elastic_blast.constants import CSP 30 | from elastic_blast.elasticblast import ElasticBlast 31 | from elastic_blast.elb_config import ElasticBlastConfig 32 | from elastic_blast.gcp import ElasticBlastGcp 33 | 34 | 35 | def ElasticBlastFactory(cfg: ElasticBlastConfig, create: bool, cleanup_stack): 36 | if cfg.cloud_provider.cloud == CSP.AWS: 37 | elastic_blast: ElasticBlast = ElasticBlastAws(cfg, create, cleanup_stack) 38 | elif cfg.cloud_provider.cloud == CSP.GCP: 39 | elastic_blast = ElasticBlastGcp(cfg, create, cleanup_stack) 40 | else: 41 | raise NotImplementedError(f'Provider {cfg.cloud_provider.cloud} is not supported yet') 42 | return elastic_blast 43 | -------------------------------------------------------------------------------- /src/elastic_blast/gcp_traits.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | elb/gcp_traits.py - helper module for GCP machine info 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | import re, logging 28 | from .base import InstanceProperties 29 | from .util import safe_exec 30 | from .constants import GCP_APIS 31 | 32 | GCP_MACHINES = { 33 | "n1-standard" : 3.75, 34 | "n1-highmem" : 6.5, 35 | "n1-highcpu" : 0.9, 36 | "n2-standard" : 4, 37 | "n2-highmem" : 8, 38 | "n2-highcpu" : 1, 39 | "n2d-standard" : 4, 40 | "n2d-highmem" : 8, 41 | "n2d-highcpu" : 1, 42 | "e2-standard" : 4, 43 | "e2-highmem" : 8, 44 | "e2-highcpu" : 1, 45 | "m1-ultramem" : 24.025, 46 | "m1-megamem" : 14.93333, 47 | "m2-ultramem" : 28.307692307692308, 48 | "c2-standard" : 4, 49 | } 50 | re_gcp_machine_type = re.compile(r'([^-]+-[^-]+)-([0-9]+)') 51 | def get_machine_properties(machineType: str) -> InstanceProperties: 52 | """ given the CGP machine type returns tuple of number of CPUs and abount of RAM in GB """ 53 | ncpu = 0 54 | nram = 0.0 55 | mo = re_gcp_machine_type.match(machineType) 56 | if mo: 57 | series, sncpu = mo.groups() 58 | ncpu = int(sncpu) 59 | nram = ncpu * GCP_MACHINES[series] 60 | else: 61 | # Should not return 0 CPUs or RAM 62 | err = f'Cannot get properties for {machineType}' 63 | raise NotImplementedError(err) 64 | return InstanceProperties(ncpu, nram) 65 | 66 | 67 | def enable_gcp_api(project: str, dry_run: bool): 68 | """ Enable GCP APIs if they are not already enabled 69 | parameters: 70 | project: GCP project 71 | dry_run: True for dry run 72 | raises: 73 | SafeExecError if there is an error checking or trying to enable APIs 74 | """ 75 | for api in GCP_APIS: 76 | cmd = 'gcloud services list --enabled --format=value(config.name) ' 77 | cmd += f'--filter=config.name={api}.googleapis.com ' 78 | cmd += f'--project {project}' 79 | if dry_run: 80 | logging.info(cmd) 81 | else: 82 | p = safe_exec(cmd) 83 | if not p.stdout: 84 | cmd = f'gcloud services enable {api}.googleapis.com ' 85 | cmd += f'--project {project}' 86 | p = safe_exec(cmd) 87 | -------------------------------------------------------------------------------- /src/elastic_blast/resources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/src/elastic_blast/resources/__init__.py -------------------------------------------------------------------------------- /src/elastic_blast/resources/quotas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/src/elastic_blast/resources/quotas/__init__.py -------------------------------------------------------------------------------- /src/elastic_blast/resources/quotas/quota_check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | src/elb/resources/quotas/quota-check.py - entry point to functionality to check 4 | whether enough resources are available to run ElasticBLAST 5 | 6 | Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 7 | Created: Mon 14 Sep 2020 09:58:36 AM EDT 8 | """ 9 | import configparser 10 | import elastic_blast.config 11 | from elastic_blast.resources.quotas.quota_aws_ec2_cf import ResourceCheckAwsEc2CloudFormation 12 | from elastic_blast.resources.quotas.quota_aws_batch import ResourceCheckAwsBatch 13 | from elastic_blast.aws import create_aws_config 14 | from elastic_blast.elb_config import ElasticBlastConfig 15 | from typing import Union 16 | 17 | def check_resource_quotas(cfg: ElasticBlastConfig) -> None: 18 | """ 19 | Check the resources needed in a Cloud Service Provider to ensure 20 | ElasticBLAST can operate. 21 | 22 | Pre-condition: cfg is a validated ElasticBLAST configuration object 23 | Post-condition: if at the time this function is invoked the resources 24 | requested can be met, the function will return, otherwise an exception will 25 | be raised. 26 | """ 27 | if cfg.cluster.dry_run: 28 | return 29 | if cfg.cloud_provider.cloud == elastic_blast.config.CSP.AWS: 30 | boto_cfg = create_aws_config(cfg.aws.region) 31 | ResourceCheckAwsEc2CloudFormation(cfg, boto_cfg)() 32 | ResourceCheckAwsBatch(boto_cfg)() 33 | elif cfg.cloud_provider.cloud == elastic_blast.config.CSP.GCP: 34 | raise NotImplementedError('Resource check for GCP is not implemented yet') 35 | else: 36 | raise NotImplementedError('Resource check for unknown cloud vendor') 37 | -------------------------------------------------------------------------------- /src/elastic_blast/subst.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Module elastic_blast.subst - substitute variables of form ${VAR_NAME} or $VAR_NAME from map object 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | import re 27 | 28 | re_sub = re.compile(r'\$(?:\{([A-Za-z_][A-Za-z0-9_]*)\}|([A-Za-z_][A-Za-z0-9_]*))') 29 | def substitute_params(job_template: str, map_obj) -> str: 30 | """ Substitute variables of form ${VAR_NAME} and $VAR_NAME with 31 | actual values from map object. 32 | 33 | Params: 34 | job_template: text to substitute variables in 35 | map_obj: object with get method to use for substitutions 36 | Returns: text with substitutions 37 | """ 38 | def _subs_var(mo): 39 | v = '' 40 | if mo.group(1): 41 | v = mo.group(1) 42 | else: 43 | v = mo.group(2) 44 | return map_obj.get(v, mo.group(0)) 45 | return re_sub.sub(_subs_var, job_template) 46 | -------------------------------------------------------------------------------- /src/elastic_blast/taxonomy.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Functions that facilitate taxonomic filtering of BLAST databases for elastic-blast searches. 23 | 24 | Author: Greg Boratyn (boratyng@ncbi.nlm.nih.gov) 25 | """ 26 | 27 | import re 28 | import logging 29 | from .filehelper import open_for_write 30 | from .constants import ELB_QUERY_BATCH_DIR, ELB_TAXIDLIST_FILE, INPUT_ERROR 31 | from .util import UserReportError 32 | from .elb_config import ElasticBlastConfig 33 | 34 | re_taxidlist_parse = re.compile(r'-(?Pnegative_)?(taxidlist)\s+(?P(\S+))') 35 | 36 | def setup_taxid_filtering(cfg: ElasticBlastConfig) -> None: 37 | """ Upload a taxid list file to results bucket under a standard name. 38 | Processes the following -taxidlist and -negative_taxidlist options in 39 | blast.options parameter. """ 40 | 41 | matches = re.findall(r'-(negative_)?(taxid(?:list|s))(?:\s+(\S+))?', 42 | cfg.blast.options) 43 | # nothing to do, if taxid filtering was not requested 44 | if not matches: 45 | return 46 | 47 | # report an error if more than one taxid filtering option was used 48 | if len(matches) > 1: 49 | raise UserReportError( 50 | returncode=INPUT_ERROR, 51 | message='BLAST -taxids, -taxidlist, -negative_taxids, and -negative_taxidlist options ' 52 | 'are mutually exclusive, please use only one of them') 53 | 54 | m = re_taxidlist_parse.search(cfg.blast.options) 55 | if m: 56 | local_filename = m.group('filename') 57 | filename = '/'.join([cfg.cluster.results, ELB_QUERY_BATCH_DIR, ELB_TAXIDLIST_FILE]) 58 | logging.debug(f'Uploading taxid list file {local_filename} to {filename}') 59 | with open_for_write(filename) as fout: 60 | with open(local_filename) as fin: 61 | for line in fin: 62 | fout.write(line) 63 | 64 | # update blast options 65 | cfg.blast.taxidlist = filename 66 | blast_opts = cfg.blast.options 67 | 68 | # replace user's taxidlist file with our taxidlist filename, to avoid 69 | # checks for proper cloud object names 70 | blast_opts = re_taxidlist_parse.sub(f'-\\gtaxidlist {ELB_TAXIDLIST_FILE}', blast_opts) 71 | cfg.blast.options = blast_opts 72 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/cloudformation-admin-iam.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: Administrator access service role for CloudFormation 3 | Parameters: 4 | Owner: 5 | Description: User who creates/runs this application 6 | Type: String 7 | 8 | Resources: 9 | CFNAdminRole: 10 | Type: "AWS::IAM::Role" 11 | Properties: 12 | RoleName: !Join [-, ['elasticblast-admin-role', !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] 13 | AssumeRolePolicyDocument: 14 | Version: "2012-10-17" 15 | Statement: 16 | - Effect: "Allow" 17 | Principal: 18 | Service: ["cloudformation.amazonaws.com"] 19 | Action: "sts:AssumeRole" 20 | Path: "/" 21 | ManagedPolicyArns: 22 | - 'arn:aws:iam::aws:policy/AdministratorAccess' 23 | Tags: 24 | - Key: Name 25 | Value: !Join [-, [elasticblast, !Ref Owner, 'admin-role']] 26 | - Key: Project 27 | Value: BLAST 28 | - Key: Owner 29 | Value: !Ref Owner 30 | - Key: billingcode 31 | Value: elastic-blast 32 | Outputs: 33 | CFNAdminRole: 34 | Description: CloudFormation admin access service role. 35 | Value: !Ref CFNAdminRole 36 | CFNAdminRoleArn: 37 | Description: CloudFormation admin access service role ARN. 38 | Value: !GetAtt CFNAdminRole.Arn 39 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/elb-janitor-cronjob.yaml.template: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: CronJob 3 | metadata: 4 | # This cannot exceed 52 characters 5 | name: elb-janitor 6 | labels: 7 | app: janitor 8 | spec: 9 | schedule: "${ELB_JANITOR_SCHEDULE}" 10 | jobTemplate: 11 | spec: 12 | template: 13 | metadata: 14 | labels: 15 | app: janitor 16 | spec: 17 | containers: 18 | - name: check-blast-jobs 19 | image: ${ELB_DOCKER_IMAGE} 20 | imagePullPolicy: IfNotPresent 21 | env: 22 | - name: ELB_GCP_PROJECT 23 | value: "${ELB_GCP_PROJECT}" 24 | - name: ELB_GCP_REGION 25 | value: "${ELB_GCP_REGION}" 26 | - name: ELB_GCP_ZONE 27 | value: "${ELB_GCP_ZONE}" 28 | - name: ELB_RESULTS 29 | value: "${ELB_RESULTS}" 30 | - name: ELB_CLUSTER_NAME 31 | value: "${ELB_CLUSTER_NAME}" 32 | restartPolicy: OnFailure 33 | concurrencyPolicy: Forbid 34 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/elb-janitor-rbac.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRoleBinding 4 | metadata: 5 | name: elb-janitor-rbac 6 | subjects: 7 | - kind: ServiceAccount 8 | name: default 9 | namespace: default 10 | roleRef: 11 | kind: ClusterRole 12 | name: cluster-admin 13 | apiGroup: rbac.authorization.k8s.io 14 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/job-cloud-split-local-ssd.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: Job 4 | metadata: 5 | name: cloud-split-ssd 6 | labels: 7 | app: setup 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: setup 13 | spec: 14 | volumes: 15 | - name: scratch 16 | emptyDir: {} 17 | containers: 18 | - name: ${K8S_JOB_IMPORT_QUERY_BATCHES} 19 | image: ${ELB_IMAGE_QS} 20 | workingDir: /blast/blastdb 21 | volumeMounts: 22 | - name: scratch 23 | mountPath: /scratch 24 | command: ["run.sh", "-i", "${INPUT_QUERY}", "-o", "${ELB_RESULTS}", "-b", "${BATCH_LEN}", "-p", "1", "-q", "/scratch"] 25 | restartPolicy: Never 26 | backoffLimit: 3 27 | activeDeadlineSeconds: ${TIMEOUT} 28 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/job-init-local-ssd.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: Job 4 | metadata: 5 | name: init-ssd-${NODE_ORDINAL} 6 | labels: 7 | app: setup 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: setup 13 | spec: 14 | volumes: 15 | - name: blastdb 16 | hostPath: 17 | path: "/mnt/disks/ssd0" 18 | containers: 19 | - name: ${K8S_JOB_GET_BLASTDB} 20 | image: ${ELB_DOCKER_IMAGE} 21 | workingDir: /blast/blastdb 22 | volumeMounts: 23 | - name: blastdb 24 | mountPath: /blast/blastdb 25 | subPath: blast 26 | env: 27 | - name: BLAST_USAGE_REPORT 28 | value: "false" 29 | command: ["/bin/bash", "-c"] 30 | args: 31 | - echo "BASH version ${BASH_VERSION}"; 32 | start=`date +%s`; 33 | sleep 30; 34 | log() { ts=`date +'%F %T'`; printf '%s RUNTIME %s %f seconds\n' "$ts" "$1" "$2"; }; 35 | if [ -z '${ELB_DB_PATH}' ]; then 36 | echo update_blastdb.pl ${ELB_DB} --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 37 | update_blastdb.pl ${ELB_DB} --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 38 | exit_code=$?; 39 | else 40 | echo gsutil -mq cp ${ELB_DB_PATH} .; 41 | gsutil -mq cp ${ELB_DB_PATH} .; 42 | exit_code=$?; 43 | [ $exit_code -eq 0 ] || exit $exit_code; 44 | [ -f ${ELB_DB}.tar.gz ] && tar xzf ${ELB_DB}.tar.gz; 45 | [ -f ${ELB_DB}.tar.gz ] && rm ${ELB_DB}.tar.gz; 46 | fi; 47 | echo update_blastdb.pl taxdb --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 48 | update_blastdb.pl taxdb --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 49 | end=`date +%s`; 50 | log "download-blastdbs" $(($end-$start)); 51 | [ $exit_code -eq 0 ] || exit $exit_code; 52 | echo blastdbcmd -info -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE}; 53 | blastdbcmd -info -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE}; 54 | exit_code=$?; 55 | [ $exit_code -eq 0 ] || exit $exit_code; 56 | echo blastdbcheck -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE} -no_isam -ends 5; 57 | blastdbcheck -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE} -no_isam -ends 5; 58 | exit_code=$?; 59 | [ $exit_code -eq 0 ] || exit $exit_code; 60 | if [ ! -z "${ELB_TAXIDLIST}" ] ; then 61 | gsutil -mq cp ${ELB_TAXIDLIST} /blast/blastdb; 62 | exit_code=$?; 63 | fi; 64 | exit $exit_code; 65 | restartPolicy: Never 66 | nodeSelector: 67 | ordinal: "${NODE_ORDINAL}" 68 | backoffLimit: 3 69 | activeDeadlineSeconds: ${TIMEOUT} 70 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/job-init-pv.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: Job 4 | metadata: 5 | name: init-pv 6 | labels: 7 | app: setup 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: setup 13 | spec: 14 | volumes: 15 | - name: blastdb 16 | persistentVolumeClaim: 17 | claimName: blast-dbs-pvc-rwo 18 | readOnly: false 19 | containers: 20 | - name: ${K8S_JOB_GET_BLASTDB} 21 | image: ${ELB_DOCKER_IMAGE} 22 | workingDir: /blast/blastdb 23 | volumeMounts: 24 | - name: blastdb 25 | mountPath: /blast/blastdb 26 | readOnly: false 27 | env: 28 | - name: BLAST_USAGE_REPORT 29 | value: "false" 30 | command: ["/bin/bash", "-c"] 31 | args: 32 | - echo "BASH version ${BASH_VERSION}"; 33 | start=`date +%s`; 34 | log() { ts=`date +'%F %T'`; printf '%s RUNTIME %s %f seconds\n' "$ts" "$1" "$2"; }; 35 | if [ -z '${ELB_DB_PATH}' ]; then 36 | echo update_blastdb.pl ${ELB_DB} --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 37 | update_blastdb.pl ${ELB_DB} --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 38 | exit_code=$?; 39 | [ $exit_code -eq 0 ] || exit $exit_code; 40 | else 41 | echo gsutil -mq cp ${ELB_DB_PATH} .; 42 | gsutil -mq cp ${ELB_DB_PATH} .; 43 | exit_code=$?; 44 | [ $exit_code -eq 0 ] || exit $exit_code; 45 | [ -f ${ELB_DB}.tar.gz ] && tar xzf ${ELB_DB}.tar.gz; 46 | [ -f ${ELB_DB}.tar.gz ] && rm ${ELB_DB}.tar.gz; 47 | fi; 48 | echo update_blastdb.pl taxdb --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 49 | update_blastdb.pl taxdb --decompress --source ${ELB_BLASTDB_SRC} --verbose --verbose --verbose --verbose --verbose --verbose ${GCP_PROJECT_OPT}; 50 | end=`date +%s`; 51 | log "download-blastdbs" $(($end-$start)); 52 | [ $exit_code -eq 0 ] || exit $exit_code; 53 | echo blastdbcmd -info -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE}; 54 | blastdbcmd -info -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE}; 55 | exit_code=$?; 56 | [ $exit_code -eq 0 ] || exit $exit_code; 57 | echo blastdbcheck -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE} -no_isam -ends 5; 58 | blastdbcheck -db ${ELB_DB} -dbtype ${ELB_DB_MOL_TYPE} -no_isam -ends 5; 59 | exit_code=$?; 60 | [ $exit_code -eq 0 ] || exit $exit_code; 61 | if [ ! -z "${ELB_TAXIDLIST}" ] ; then 62 | gsutil -mq cp ${ELB_TAXIDLIST} /blast/blastdb; 63 | exit_code=$?; 64 | fi; 65 | exit $exit_code; 66 | - name: ${K8S_JOB_IMPORT_QUERY_BATCHES} 67 | image: ${ELB_IMAGE_QS} 68 | workingDir: /blast/queries 69 | volumeMounts: 70 | - name: blastdb 71 | mountPath: /blast/queries 72 | readOnly: false 73 | command: ["run.sh", "-i", "${INPUT_QUERY}", "-o", "${ELB_RESULTS}", "-b", "${BATCH_LEN}", "-c", "${COPY_ONLY}", "-q", "/blast/queries/"] 74 | restartPolicy: Never 75 | backoffLimit: 9 76 | activeDeadlineSeconds: ${TIMEOUT} 77 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/job-submit-jobs.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: Job 4 | metadata: 5 | name: submit-jobs 6 | labels: 7 | app: submit 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: submit 13 | spec: 14 | volumes: 15 | - name: workspace 16 | emptyDir: {} 17 | containers: 18 | - name: ${K8S_JOB_SUBMIT_JOBS} 19 | image: ${ELB_DOCKER_IMAGE} 20 | imagePullPolicy: IfNotPresent 21 | env: 22 | - name: ELB_GCP_PROJECT 23 | value: "${ELB_GCP_PROJECT}" 24 | - name: ELB_GCP_ZONE 25 | value: "${ELB_GCP_ZONE}" 26 | - name: ELB_RESULTS 27 | value: "${ELB_RESULTS}" 28 | - name: ELB_CLUSTER_NAME 29 | value: "${ELB_CLUSTER_NAME}" 30 | - name: ELB_NUM_NODES 31 | value: "${ELB_NUM_NODES}" 32 | - name: ELB_PD_SIZE 33 | value: "${ELB_PD_SIZE}" 34 | - name: ELB_LABELS 35 | value: "${ELB_LABELS}" 36 | - name: ELB_USE_LOCAL_SSD 37 | value: "${ELB_USE_LOCAL_SSD}" 38 | # - name: ELB_DEBUG_SUBMIT_JOB_FAIL 39 | # value: "1" 40 | workingDir: /workspace 41 | volumeMounts: 42 | - name: workspace 43 | mountPath: /workspace 44 | restartPolicy: Never 45 | backoffLimit: 0 46 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/pvc-rom.yaml.template: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: blast-dbs-pvc 5 | spec: 6 | dataSource: 7 | name: blast-dbs-snapshot 8 | kind: VolumeSnapshot 9 | apiGroup: snapshot.storage.k8s.io 10 | storageClassName: gcp-pd-ssd 11 | accessModes: 12 | - ReadOnlyMany 13 | resources: 14 | requests: 15 | storage: ${ELB_PD_SIZE} 16 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/pvc-rwo.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: PersistentVolumeClaim 4 | metadata: 5 | name: blast-dbs-pvc-rwo 6 | spec: 7 | storageClassName: "gcp-pd-ssd" 8 | accessModes: 9 | - ReadWriteOnce 10 | resources: 11 | requests: 12 | storage: ${ELB_PD_SIZE} 13 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/storage-gcp-ssd.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: storage.k8s.io/v1 3 | kind: StorageClass 4 | metadata: 5 | name: gcp-pd-ssd 6 | provisioner: pd.csi.storage.gke.io 7 | parameters: 8 | type: pd-ssd 9 | csi.storage.k8s.io/fstype: ext4 10 | replication-type: none 11 | reclaimPolicy: Delete 12 | allowVolumeExpansion: true 13 | volumeBindingMode: WaitForFirstConsumer 14 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/storage-gcp.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: storage.k8s.io/v1 3 | kind: StorageClass 4 | metadata: 5 | name: gcp-pd 6 | provisioner: pd.csi.storage.gke.io 7 | parameters: 8 | type: pd-standard 9 | csi.storage.k8s.io/fstype: ext4 10 | replication-type: none 11 | reclaimPolicy: Delete 12 | allowVolumeExpansion: true 13 | volumeBindingMode: WaitForFirstConsumer 14 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/volume-snapshot-class.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: snapshot.storage.k8s.io/v1 2 | kind: VolumeSnapshotClass 3 | metadata: 4 | name: blast-dbs-snapshotclass 5 | driver: pd.csi.storage.gke.io 6 | deletionPolicy: Delete 7 | -------------------------------------------------------------------------------- /src/elastic_blast/templates/volume-snapshot.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: snapshot.storage.k8s.io/v1 2 | kind: VolumeSnapshot 3 | metadata: 4 | name: blast-dbs-snapshot 5 | spec: 6 | volumeSnapshotClassName: blast-dbs-snapshotclass 7 | source: 8 | persistentVolumeClaimName: blast-dbs-pvc-rwo 9 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/__init__.py -------------------------------------------------------------------------------- /tests/app/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/app/data/bad_bucket_conf.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 32 11 | num-cpus = 15 12 | pd-size = 1000G 13 | #use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | results = gs://does-not-exist 18 | queries = gs://test-bucket/test-query.fa 19 | db = nt 20 | mem-request = 64G 21 | mem-limit = 80G 22 | batch-len = 625000 23 | -------------------------------------------------------------------------------- /tests/app/data/blastdb-notfound.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | gcp-project = a-gcp-project 3 | gcp-region = us-east4 4 | gcp-zone = us-east4-b 5 | 6 | [cluster] 7 | name = pytest-elastic-blast-no-blastdb 8 | machine-type = n1-standard-32 9 | num-cpus = 30 10 | num-nodes = 10 11 | pd-size = 3000G 12 | 13 | [blast] 14 | results = gs://test-results 15 | program = blastx 16 | db = some-non-exsitent-database 17 | batch-len = 10000 18 | options = -task blastx-fast 19 | queries = gs://test-bucket/test-query.fa 20 | -------------------------------------------------------------------------------- /tests/app/data/cleanup-error.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 2 11 | num-cpus = 4 12 | #pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | results = gs://elasticblast-tomcat/pytest/app/cleanup-error 17 | program = blastn 18 | db = nt 19 | mem-request = 500M 20 | mem-limit = 1.0G 21 | queries = gs://elastic-blast-samples/queries/small/e7ebd4c9-d8a3-405c-8180-23b85f1709a7.fa 22 | batch-len = 30000 23 | -------------------------------------------------------------------------------- /tests/app/data/cluster-error.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-highmem-32 10 | num-nodes = 2 11 | num-cpus = 4 12 | #pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | results = gs://elasticblast-tomcat/pytest/app/cluster-error 17 | program = blastn 18 | db = nt 19 | mem-request = 500M 20 | mem-limit = 1.0G 21 | queries = gs://elastic-blast-samples/queries/small/e7ebd4c9-d8a3-405c-8180-23b85f1709a7.fa 22 | batch-len = 30000 23 | -------------------------------------------------------------------------------- /tests/app/data/good_conf.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 32 11 | num-cpus = 15 12 | pd-size = 1000G 13 | #use-preemptible = 1 14 | 15 | [blast] 16 | results = gs://elasticblast-tomcat/pytest/app/good-conf 17 | program = blastn 18 | db = testdb 19 | mem-request = 64G 20 | mem-limit = 80G 21 | batch-len = 625000 22 | queries = test-queries.fa 23 | -------------------------------------------------------------------------------- /tests/app/data/incomplete-mem-limit-optimal-aws-machine-type.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | # N.B.: On AWS for optimal machine-type: mem-limit MUST be provided 4 | 5 | [cloud-provider] 6 | aws-region = us-east-1 7 | 8 | [cluster] 9 | machine-type = optimal 10 | num-nodes = 1 11 | num-cpus = 4 12 | disk-type = gp2 13 | # minimal size to trigger faster IO speed for gp2 14 | pd-size = 334G 15 | use-preemptible = true 16 | 17 | [blast] 18 | program = blastn 19 | db = testdb 20 | mem-request = 500M 21 | queries = s3://test-bucket/test-query.fa 22 | results = s3://test-results 23 | -------------------------------------------------------------------------------- /tests/app/data/invalid-blast-opt-no-closing-quote.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against pdbnt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | 6 | [cluster] 7 | machine-type = m5.xlarge 8 | num-nodes = 1 9 | num-cpus = 4 10 | use-preemptible = true 11 | 12 | [blast] 13 | options = -outfmt 7" 14 | mem-limit = 1G 15 | program = blastn 16 | db = s3://some-bucket/mydb 17 | mem-request = 500M 18 | queries = s3://test-bucket/test-query.fa 19 | results = s3://test-results 20 | -------------------------------------------------------------------------------- /tests/app/data/invalid-cpu-req-gcp.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 1 11 | num-cpus = 16 12 | pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | queries = gs://test-bucket/test-query.fa 18 | db = testdb 19 | results = gs://test-bucket/invalid-cpu 20 | -------------------------------------------------------------------------------- /tests/app/data/invalid-dollar-sign-char.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = ${ELB_GCP_PROJECT} 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 1 11 | num-cpus = 16 12 | pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | queries = gs://test-bucket-${USER}/test-query.fa 18 | db = testdb 19 | results = gs://test-bucket/invalid-cpu 20 | -------------------------------------------------------------------------------- /tests/app/data/invalid-machine-type-aws.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | 6 | [cluster] 7 | machine-type = dummy-machine-type-does-not-exist 8 | num-nodes = 32 9 | num-cpus = 15 10 | pd-size = 1000G 11 | #use-preemptible = 1 12 | 13 | [blast] 14 | results = s3://test-results 15 | program = blastn 16 | queries = s3://test-bucket/test-query.fa 17 | db = s3://bucket/some-db 18 | mem-request = 64G 19 | mem-limit = 80G 20 | batch-len = 625000 21 | -------------------------------------------------------------------------------- /tests/app/data/invalid-machine-type-gcp.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = dummy-machine-type-does-not-exist 10 | num-nodes = 32 11 | num-cpus = 15 12 | pd-size = 1000G 13 | #use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | queries = gs://test-bucket/test-query.fa 18 | db = testdb 19 | mem-request = 64G 20 | mem-limit = 80G 21 | batch-len = 625000 22 | results = gs://test-results 23 | -------------------------------------------------------------------------------- /tests/app/data/invalid-mem-req.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | gcp-project = a-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 32 11 | num-cpus = 15 12 | pd-size = 1000G 13 | #use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | queries = gs://test-bucket/test-query.fa 18 | db = nt 19 | mem-request = 0G 20 | mem-limit = 0G 21 | batch-len = 625000 22 | results = gs://test-results 23 | -------------------------------------------------------------------------------- /tests/app/data/no-num-cpus-optimal-aws-machine-type.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | # N.B.: On AWS for optimal machine-type: mem-limit MUST be provided 4 | 5 | [cloud-provider] 6 | aws-region = us-east-1 7 | 8 | [cluster] 9 | machine-type = optimal 10 | disk-type = gp2 11 | # minimal size to trigger faster IO speed for gp2 12 | pd-size = 334G 13 | use-preemptible = true 14 | 15 | [blast] 16 | program = blastn 17 | db = testdb 18 | mem-limit = 500M 19 | queries = s3://test-bucket/test-query.fa 20 | results = s3://test-results 21 | -------------------------------------------------------------------------------- /tests/app/data/query.fa: -------------------------------------------------------------------------------- 1 | >test query 2 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 3 | 4 | -------------------------------------------------------------------------------- /tests/app/data/too-many-k8s-jobs.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against pdbnt that 2 | # will lead to too many k8s jobs being created (i.e.: should fail) 3 | 4 | [cloud-provider] 5 | gcp-project = a-gcp-project 6 | gcp-region = us-east4 7 | gcp-zone = us-east4-b 8 | 9 | [cluster] 10 | machine-type = n1-standard-16 11 | num-nodes = 1 12 | pd-size = 500G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | results = gs://test-results 17 | program = blastn 18 | db = testdb 19 | mem-request = 500M 20 | mem-limit = 1.0G 21 | queries = gs://test-bucket/long-query.fa 22 | batch-len = 1000 23 | -------------------------------------------------------------------------------- /tests/app/elastic_blast_app.py: -------------------------------------------------------------------------------- 1 | ../../bin/elastic-blast -------------------------------------------------------------------------------- /tests/app/gcloud: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is a fake gcloud that does nothing and always succeeds. 4 | # It is needed for test_dependency_error unit test. 5 | 6 | exit 0 7 | -------------------------------------------------------------------------------- /tests/app/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/app 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/aws/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/aws/data/aws-no-security-group.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | aws-key-pair = workers-key 6 | aws-subnet = subnet-a43744d3 7 | aws-job-role = arn:aws:iam::250813660784:role/cbb-research-db-batch-role 8 | 9 | [cluster] 10 | machine-type = m5.8xlarge 11 | num-nodes = 1 12 | num-cpus = 4 13 | 14 | [blast] 15 | program = blastn 16 | db = pdbnt 17 | mem-request = 500M 18 | mem-limit = 1.0G 19 | queries = s3://elasticblast-test/testdata/hepa_batch_016.gz 20 | batch-len = 5000000 21 | -------------------------------------------------------------------------------- /tests/aws/data/aws-no-subnets.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | aws-key-pair = workers-key 6 | aws-job-role = arn:aws:iam::250813660784:role/cbb-research-db-batch-role 7 | 8 | [cluster] 9 | machine-type = m5.8xlarge 10 | num-nodes = 1 11 | num-cpus = 4 12 | 13 | [blast] 14 | program = blastn 15 | db = pdbnt 16 | mem-request = 500M 17 | mem-limit = 1.0G 18 | queries = s3://elasticblast-test/testdata/hepa_batch_016.gz 19 | batch-len = 5000000 20 | -------------------------------------------------------------------------------- /tests/aws/data/aws-wrong-custom-db.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | aws-region = us-east-1 3 | 4 | [cluster] 5 | machine-type = m5.large 6 | num-cpus = 2 7 | pd-size = 1G 8 | num-nodes = 1 9 | 10 | [blast] 11 | results = s3://elasticblast-test 12 | program = blastn 13 | db = s3://elasticblast-test/db/this-database-does-not-exist 14 | queries = query.fa 15 | -------------------------------------------------------------------------------- /tests/aws/data/aws-wrong-provider-custom-db.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | aws-region = us-east-1 3 | 4 | [cluster] 5 | machine-type = m5.large 6 | num-cpus = 2 7 | pd-size = 1G 8 | num-nodes = 1 9 | 10 | [blast] 11 | results = s3://elasticblast-test 12 | program = blastn 13 | # NOTE: the data lives in GCP, not AWS 14 | db = gs://elasticblast-test/db/pdbnt 15 | queries = query.fa 16 | -------------------------------------------------------------------------------- /tests/aws/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/aws 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/aws_traits/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/aws_traits/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/aws_traits 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined 5 | -------------------------------------------------------------------------------- /tests/base/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/base/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/elb_config 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/blastdb/testdb.pdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/blastdb/testdb.pdb -------------------------------------------------------------------------------- /tests/blastdb/testdb.phr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/blastdb/testdb.phr -------------------------------------------------------------------------------- /tests/blastdb/testdb.pin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/blastdb/testdb.pin -------------------------------------------------------------------------------- /tests/blastdb/testdb.pog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/blastdb/testdb.pog -------------------------------------------------------------------------------- /tests/blastdb/testdb.pos: -------------------------------------------------------------------------------- 1 |  Q5R654.1Q28RX4.1Q7L2H7.1 -------------------------------------------------------------------------------- /tests/blastdb/testdb.pot: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /tests/blastdb/testdb.psq: -------------------------------------------------------------------------------- 1 |  2 |    3 |  4 | 5 |        6 |  7 |         8 |  9 |  10 |      11 |   12 |  13 |     14 |        15 |  16 |  17 |    18 |   19 |       20 |    21 | 22 |  23 |       24 |    25 |            26 |  27 |   28 |  29 |    30 |   31 |   32 |  33 |    34 |     35 |  36 |       37 |  38 |    39 |    40 |  41 |       42 |  43 |     44 |    45 | 46 |     47 | 48 |  49 |      50 |     51 |  52 |      53 |  54 |  55 |      56 |     57 |       58 |  59 |  60 |  61 |    62 |  63 |  64 |    -------------------------------------------------------------------------------- /tests/blastdb/testdb.ptf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/blastdb/testdb.ptf -------------------------------------------------------------------------------- /tests/blastdb/testdb.pto: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /tests/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/config/__init__.py -------------------------------------------------------------------------------- /tests/config/data/aws-defaults-cluster-name.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | aws-region = us-east-1 3 | 4 | [cluster] 5 | name = arbitrary-cluster-name-to-be-ignored 6 | 7 | #[blast] 8 | #results = s3://user-bucket 9 | -------------------------------------------------------------------------------- /tests/config/data/aws-defaults.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | aws-region = us-east-1 3 | 4 | [cluster] 5 | machine-type = m5.8xlarge 6 | 7 | [blast] 8 | program = blastn 9 | db = testdb 10 | queries = test-queries 11 | results = s3://user-bucket 12 | -------------------------------------------------------------------------------- /tests/config/data/correct-cfg-file.ini: -------------------------------------------------------------------------------- 1 | # Sample ElasticBLAST configuration file 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | gcp-network = custom-vpc 8 | gcp-subnetwork = custom 9 | 10 | [cluster] 11 | machine-type = n1-standard-32 12 | num-nodes = 1 13 | num-cpus = 5 14 | pd-size = 1000G 15 | use-preemptible = yes 16 | 17 | [blast] 18 | program = blastp 19 | db = testdb 20 | mem-request = 500M 21 | mem-limit = 1G 22 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 23 | results = gs://my-test-bucket 24 | -------------------------------------------------------------------------------- /tests/config/data/corrupt-cfg-file.ini: -------------------------------------------------------------------------------- 1 | # Sample corrupt ElasticBLAST configuration file 2 | 3 | import invalid 4 | # this file should have been a python script ;) 5 | -------------------------------------------------------------------------------- /tests/config/data/elb-aws-blastn-pdbnt.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file template to run blastn against nt 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | aws-key-pair = workers-key 6 | aws-subnet = subnet-a43744d3 7 | aws-security-group = sg-6cae8d08 8 | aws-job-role = arn:aws:iam::250813660784:role/cbb-research-db-batch-role 9 | 10 | [cluster] 11 | machine-type = m5.8xlarge 12 | num-nodes = 1 13 | num-cpus = 4 14 | 15 | [blast] 16 | results = s3://user-bucket 17 | program = blastn 18 | db = testdb 19 | mem-request = 500M 20 | mem-limit = 1.0G 21 | queries = gs://elastic-blast-samples/queries/MANE/MANE.GRCh38.v0.8.select_refseq_rna.fna 22 | batch-len = 5000000 23 | -------------------------------------------------------------------------------- /tests/config/data/gcp-defaults.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | gcp-project = some-project 3 | gcp-region = test-gcp-region 4 | gcp-zone = some-zone 5 | 6 | [blast] 7 | program = blastn 8 | db = gs://some-bucket/some-db 9 | queries = test-queries 10 | results = gs://user-bucket 11 | -------------------------------------------------------------------------------- /tests/config/data/incomplete-gcp-vpc-cfg-file.ini: -------------------------------------------------------------------------------- 1 | # Sample ElasticBLAST configuration file 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | gcp-subnetwork = custom 8 | 9 | [cluster] 10 | machine-type = n1-standard-32 11 | num-nodes = 1 12 | num-cpus = 5 13 | pd-size = 1000G 14 | use-preemptible = yes 15 | 16 | [blast] 17 | program = blastp 18 | db = swissprot 19 | mem-request = 500M 20 | mem-limit = 1G 21 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 22 | results = gs://my-test-bucket 23 | -------------------------------------------------------------------------------- /tests/config/data/instance-too-small-aws.ini: -------------------------------------------------------------------------------- 1 | # Sample ElasticBLAST configuration file with an instance that is too small to run ElasticBLAST 2 | 3 | [cloud-provider] 4 | aws-region = us-east-1 5 | 6 | # These machine types were obtained via the command below 7 | # ec2-instance-selector -g 0 -a x86_64 --memory-max 2 --output table-wide 8 | [cluster] 9 | machine-type = c1.medium 10 | #machine-type = m1.small 11 | #machine-type = t1.micro 12 | #machine-type = t2.micro 13 | #machine-type = t2.nano 14 | #machine-type = t2.small 15 | #machine-type = t3.micro 16 | #machine-type = t3.nano 17 | #machine-type = t3.small 18 | #machine-type = t3a.micro 19 | #machine-type = t3a.nano 20 | #machine-type = t3a.small 21 | num-nodes = 10 22 | 23 | [blast] 24 | program = blastp 25 | db = testdb 26 | queries = s3://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 27 | results = s3://my-test-bucket 28 | -------------------------------------------------------------------------------- /tests/config/data/instance-too-small-gcp.ini: -------------------------------------------------------------------------------- 1 | # Sample ElasticBLAST configuration file with an instance that is too small to run ElasticBLAST 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-highcpu-2 10 | num-nodes = 10 11 | 12 | [blast] 13 | program = blastp 14 | db = testdb 15 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 16 | results = gs://my-test-bucket 17 | -------------------------------------------------------------------------------- /tests/config/data/invalid-parameters.ini: -------------------------------------------------------------------------------- 1 | # Sample invalid ElasticBLAST configuration file 2 | 3 | [cloud-provider] 4 | gcp-project = dummy 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | # invalid value 11 | num-nodes = -1 12 | num-cpus = 5 13 | # N.B.: it's OK to leave this empty, it'll be automatically configured based on database choice 14 | pd-size = 15 | use-preemptible = no 16 | 17 | [blast] 18 | # invalid value 19 | program = the-blastp 20 | db = swissprot 21 | mem-request = 500M 22 | mem-limit = 1G 23 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 24 | batch-len = 100000 25 | options = -outfmt 6 -num_threads 32 26 | results = gs://my-test-bucket 27 | -------------------------------------------------------------------------------- /tests/config/data/mem-limit-too-high.ini: -------------------------------------------------------------------------------- 1 | # Sample ElasticBLAST configuration file with memory limit too high 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 10 11 | 12 | [blast] 13 | program = blastp 14 | db = testdb 15 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 16 | results = gs://my-test-bucket 17 | mem-limit = 130G 18 | -------------------------------------------------------------------------------- /tests/config/data/minimal-cfg-file.ini: -------------------------------------------------------------------------------- 1 | # Sample minimal ElasticBLAST configuration file: all missing parameters are auto-configured 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | #machine-type = n1-standard-32 10 | num-nodes = 10 11 | #num-cpus = 5 12 | #pd-size = 1000G 13 | #use-preemptible = yes 14 | 15 | [blast] 16 | program = blastp 17 | db = swissprot 18 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 19 | results = gs://my-test-bucket 20 | -------------------------------------------------------------------------------- /tests/config/data/missing-required-parameters.ini: -------------------------------------------------------------------------------- 1 | # Sample invalid ElasticBLAST configuration file 2 | 3 | [cloud-provider] 4 | # This must be provided 5 | gcp-project = 6 | gcp-region = us-east4 7 | gcp-zone = us-east4-b 8 | 9 | [cluster] 10 | machine-type = n1-standard-32 11 | num-nodes = 1 12 | num-cpus = 5 13 | pd-size = 1000G 14 | use-preemptible = on 15 | 16 | [blast] 17 | program = blastp 18 | # Missing BLASTDB is an error 19 | db = 20 | mem-request = 500M 21 | mem-limit = 1G 22 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 23 | batch-len = 100000 24 | # Missing results is an error 25 | -------------------------------------------------------------------------------- /tests/config/data/multiple-query-files.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | aws-region = us-east-1 3 | 4 | [blast] 5 | program = blastn 6 | db = testdb 7 | queries = query-file-1 query-file-2 8 | results = s3://my-results 9 | -------------------------------------------------------------------------------- /tests/config/data/optional-cfg-file.ini: -------------------------------------------------------------------------------- 1 | # Sample minimal ElasticBLAST configuration file: all missing parameters are auto-configured 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | #machine-type = n1-standard-32 10 | num-nodes = 10 11 | #num-cpus = 5 12 | #pd-size = 1000G 13 | #use-preemptible = yes 14 | 15 | [blast] 16 | program = blastp 17 | db = swissprot 18 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 19 | results = gs://my-test-bucket 20 | options = -task blastp-fast 21 | -------------------------------------------------------------------------------- /tests/config/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/cost/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/cost/__init__.py -------------------------------------------------------------------------------- /tests/cost/data/aws-run-summary.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "clusterInfo": { 4 | "provider": "AWS", 5 | "numMachines": 10, 6 | "numVCPUsPerMachine": 32, 7 | "RamPerMachine": 120, 8 | "machineType": "m5.8xlarge", 9 | "region": "us-east-1", 10 | "zone": "us-east-1b", 11 | "storageType": "persistentDisk" 12 | }, 13 | "runtime": { 14 | "wallClock": 54000, 15 | "blastdbSetup": { 16 | "startTime": 2147483647, 17 | "endTime": 2147489647 18 | }, 19 | "blast": { 20 | "startTime": 2147489747, 21 | "endTime": 2147491747 22 | }, 23 | "blastData": { 24 | "queryLength": 53353969, 25 | "databaseLength": 399515 26 | }, 27 | "lettersPerSecondPerCpu": 0, 28 | "exitCode": 0 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tests/cost/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/db_metadata/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/db_metadata/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/db_metadata 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined 5 | -------------------------------------------------------------------------------- /tests/elb_config/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/elb_config/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/elb_config 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/fasta_split/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/fasta_split/__init__.py -------------------------------------------------------------------------------- /tests/fasta_split/job-batch.yaml.template: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: job-${QUERY_NUM} 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: blast 10 | image: us.gcr.io/ncbi-sandbox-blast/elastic-blast 11 | command: ["blastn", "-db", "nt", "-query", "${QUERY_PATH}/${QUERY}", "-out", "${RESULTS}", "-outfmt", "11"] 12 | restartPolicy: OnFailure 13 | backoffLimit: 1 14 | -------------------------------------------------------------------------------- /tests/fasta_split/performance-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # performance-test.sh: What this script does 3 | # 4 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 5 | # Created: Mon 09 Mar 2020 09:18:03 AM EDT 6 | 7 | TEST_FILE=gs://elastic-blast-samples/queries/protein/dark-matter-1M.faa.gz 8 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 9 | TEST_BATCH_LENGTH=10000 10 | TEST_NBATCHES=19920 11 | 12 | #export PATH=/bin:/usr/bin:/opt/python-all/bin 13 | shopt -s nullglob 14 | set -xeuo pipefail 15 | 16 | created=$(date -Iseconds | tr : - | tr "[:upper:]" "[:lower:]") 17 | test_bucket=gs://${USER}-test/performance-test-elastic-blast-fasta-split-script-$created-batches 18 | 19 | manifest=`mktemp -t $(basename -s .sh $0)-for-elastic-blast-fasta-split-script-XXXXXXX` 20 | jobs=`mktemp -d` 21 | exit_code=0 22 | 23 | cleanup() { 24 | # find . -type f -name "*.yaml" -delete 25 | rm -rf ${jobs} 26 | rm -f ${manifest} 27 | set +e 28 | gsutil -qm rm -r ${test_bucket} 29 | exit $exit_code 30 | } 31 | 32 | timeout=${1:-"2m"} 33 | trap "cleanup" INT QUIT EXIT HUP KILL ALRM 34 | 35 | find $SCRIPT_DIR -type f -name "*.yaml" -delete 36 | rm -f ${manifest} 37 | time timeout $timeout fasta_split.py ${TEST_FILE} -l ${TEST_BATCH_LENGTH} -o ${test_bucket} -j ${jobs} -m ${manifest} -t $SCRIPT_DIR/../../src/elb/templates/blast-batch-job.yaml.template 38 | exit_code=$? 39 | # timeout code is either 124 if SIGTERM was sent, 128+9 if SIGKILL, or return code of the program if timeout did not happen 40 | # fasta_split uses 1 to 7 codes to report various errors, so we combine codes to convey this to the user 41 | [ ${TEST_NBATCHES} -eq `find ${jobs} -name "*.yaml" -type f | wc -l` ] || { echo "Mismatch number of generated job files"; exit_code=$((10+exit_code)); } 42 | [ ${TEST_NBATCHES} -eq `wc -l ${manifest} | cut -f 1 -d ' '` ] || { echo "Mismatch number of generated job files"; exit_code=$((20+exit_code)); } 43 | -------------------------------------------------------------------------------- /tests/fasta_split/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/fasta_split/testdata/actually_gzipped_fasta.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/fasta_split/testdata/actually_gzipped_fasta.fa -------------------------------------------------------------------------------- /tests/fasta_split/testdata/e7ebd4c9-d8a3-405c-8180-23b85f1709a7.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/fasta_split/testdata/e7ebd4c9-d8a3-405c-8180-23b85f1709a7.fa.gz -------------------------------------------------------------------------------- /tests/fasta_split/testdata/empty_file.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/fasta_split/testdata/empty_file.fa -------------------------------------------------------------------------------- /tests/fasta_split/testdata/fasta.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/fasta_split/testdata/fasta.fa.gz -------------------------------------------------------------------------------- /tests/fasta_split/testdata/test_template: -------------------------------------------------------------------------------- 1 | ${QUERY_NUM}$QUERY_NUM 2 | ${QUERY_PATH} 3 | ${QUERY} 4 | ${RESULTS} 5 | ${SOME_NON_EXISTING_VARIABLE} 6 | ${SOME_EXTRA_SUBSTITUTION_VARIABLE_WE_PASS_AS_ARGUMENT} -------------------------------------------------------------------------------- /tests/filehelper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/filehelper/__init__.py -------------------------------------------------------------------------------- /tests/filehelper/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/filehelper 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/filehelper/test_filesystem_checks.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Unit tests for filehelper module 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | import pytest 28 | import os 29 | from elastic_blast import filehelper 30 | from tempfile import TemporaryDirectory 31 | import pytest 32 | from tests.utils import gke_mock, NOT_WRITABLE_BUCKET 33 | 34 | TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') 35 | WRITEABLE_BUCKET = 'gs://test-bucket' 36 | 37 | def test_check_for_read_success(gke_mock): 38 | filehelper.check_for_read('gs://blast-db/latest-dir') 39 | filehelper.check_for_read('s3://ncbi-blast-databases/latest-dir') 40 | filehelper.check_for_read(os.path.join(TEST_DATA_DIR, 'test.tar')) 41 | 42 | 43 | def test_check_for_read_failure(gke_mock): 44 | with pytest.raises(FileNotFoundError): 45 | filehelper.check_for_read('gs://blast-db/non-existent-file') 46 | with pytest.raises(FileNotFoundError): 47 | filehelper.check_for_read(os.path.join(TEST_DATA_DIR, 'non-existent-file')) 48 | with pytest.raises(FileNotFoundError): 49 | filehelper.check_for_read('https://storage.googleapis.com/blast-db/invalid-file') 50 | 51 | 52 | def test_check_for_write_success(gke_mock): 53 | filehelper.check_dir_for_write(WRITEABLE_BUCKET) 54 | with TemporaryDirectory() as d: 55 | filehelper.check_dir_for_write(d) 56 | 57 | 58 | def test_check_for_write_failure(gke_mock): 59 | with pytest.raises(PermissionError): 60 | filehelper.check_dir_for_write(f'gs://{NOT_WRITABLE_BUCKET}') 61 | with pytest.raises(PermissionError): 62 | filehelper.check_dir_for_write('/home/') 63 | -------------------------------------------------------------------------------- /tests/filehelper/test_local_access.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Unit tests for filehelper module 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | import os, pytest 28 | from elastic_blast import filehelper 29 | 30 | TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') 31 | 32 | expected = """\ 33 | Test file 1 34 | Another line 35 | Test file 2 36 | Waxing poetic 37 | """ 38 | 39 | 40 | def test_tar_merge_read(): 41 | with filehelper.open_for_read(os.path.join(TEST_DATA_DIR, 'test.tar')) as f: 42 | contents = f.read() 43 | assert(contents == expected) 44 | 45 | -------------------------------------------------------------------------------- /tests/gcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/gcp/__init__.py -------------------------------------------------------------------------------- /tests/gcp/data/test-cfg-file.ini: -------------------------------------------------------------------------------- 1 | # Sample minimal ElasticBLAST configuration file: all missing parameters are auto-configured 2 | 3 | [cloud-provider] 4 | gcp-project = ncbi-sandbox-blast 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | name = elastic-blast 10 | machine-type = n1-standard-1 11 | use-preemptible = 1 12 | num-nodes = 1 13 | #enable-autoscaling = 1 14 | 15 | [blast] 16 | program = blastp 17 | db = swissprot 18 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 19 | results = gs://my-test-bucket 20 | -------------------------------------------------------------------------------- /tests/gcp/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/gcp 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/gcp/test_cluster_api.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | from elastic_blast.gcp import check_cluster, start_cluster, delete_cluster 22 | import time 23 | import os 24 | import pytest 25 | import configparser 26 | 27 | TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') 28 | 29 | 30 | @pytest.fixture(scope="module") 31 | def get_cluster_name(): 32 | str_current_time = str(int(time.time())) 33 | uniq_cluster_name = "pytest-"+str_current_time 34 | if 'USER' in os.environ: 35 | uniq_cluster_name = uniq_cluster_name + "-" + os.environ['USER'] 36 | else: 37 | uniq_cluster_name = uniq_cluster_name + "-" + str(os.getpid()) 38 | 39 | if 'HOST' in os.environ: 40 | uniq_cluster_name = uniq_cluster_name + "-" + os.environ['HOST'] 41 | else: 42 | uniq_cluster_name = uniq_cluster_name + "-" + str(os.getpid()) 43 | 44 | return uniq_cluster_name 45 | 46 | # FIXME: https://jira.ncbi.nlm.nih.gov/browse/EB-217 47 | 48 | 49 | @pytest.mark.skip(reason="The logic in these tests assumes ordering of tests, leaks resources") 50 | def test_start_cluster(get_cluster_name): 51 | cluster_name = get_cluster_name 52 | cfg = configparser.ConfigParser() 53 | cfg.read(f"{TEST_DATA_DIR}/test-cfg-file.ini") 54 | # override name to allow simulteniouse runs 55 | cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = cluster_name 56 | created_name = start_cluster(cfg) 57 | assert cluster_name == created_name 58 | 59 | 60 | @pytest.mark.skip(reason="The logic in these tests assumes ordering of tests, leaks resources") 61 | def test_cluster_presense(get_cluster_name): 62 | cluster_name = get_cluster_name 63 | cfg = configparser.ConfigParser() 64 | cfg.read(f"{TEST_DATA_DIR}/test-cfg-file.ini") 65 | cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = cluster_name 66 | status = check_cluster(cfg) 67 | assert status == 'RUNNING' 68 | 69 | 70 | @pytest.mark.skip(reason="The logic in these tests assumes ordering of tests, leaks resources") 71 | def test_delete_cluster(get_cluster_name): 72 | cluster_name = get_cluster_name 73 | cfg = configparser.ConfigParser() 74 | cfg.read(f"{TEST_DATA_DIR}/test-cfg-file.ini") 75 | cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = cluster_name 76 | deleted_name = delete_cluster(cfg) 77 | assert deleted_name == cluster_name 78 | 79 | 80 | @pytest.mark.skip(reason="The logic in these tests assumes ordering of tests, leaks resources") 81 | def test_cluster_deletion(get_cluster_name): 82 | cluster_name = get_cluster_name 83 | cfg = configparser.ConfigParser() 84 | cfg.read(f"{TEST_DATA_DIR}/test-cfg-file.ini") 85 | cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = cluster_name 86 | status = check_cluster(cfg) 87 | assert status == '' 88 | -------------------------------------------------------------------------------- /tests/gcp_traits/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/gcp_traits/__init__.py -------------------------------------------------------------------------------- /tests/gcp_traits/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/gcp_traits/test_gcp_traits.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Test for elastic_blast.gcp_traits 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | from elastic_blast.gcp_traits import get_machine_properties 27 | from elastic_blast.base import InstanceProperties 28 | import pytest 29 | 30 | def test_ram(): 31 | assert get_machine_properties('n1-standard-32') == InstanceProperties(32, 120) 32 | 33 | def test_unsupported_instance_type_optimal(): 34 | with pytest.raises(NotImplementedError): 35 | get_machine_properties('optimal') 36 | 37 | def test_not_found(): 38 | with pytest.raises(KeyError): 39 | get_machine_properties('n1-nonstandard-32') 40 | -------------------------------------------------------------------------------- /tests/integration-test-for-failure-with-code.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tests/integration-test-for-job-failure.sh: End-to-end ElasticBLAST blast search 3 | # expecting a job failure 4 | # 5 | # Authors: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Victor Joukov (joukovv@ncbi.nlm.nih.gov) 7 | # Created: Fri 02 Jul 2020 04:01:00 PM EDT 8 | 9 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 10 | set -euo pipefail 11 | 12 | # All other settings are specified in the config file 13 | CFG=${1:-"${SCRIPT_DIR}/../share/etc/elb-aws-blastn-out-of-memory"} 14 | # Default exit code for error 15 | ERR_CODE=${2:-1} 16 | ROOT_DIR=${SCRIPT_DIR}/.. 17 | export ELB_DONT_DELETE_SETUP_JOBS=1 18 | export BLAST_USAGE_REPORT=false 19 | 20 | DRY_RUN='' 21 | #DRY_RUN=--dry-run # uncomment for debugging 22 | logfile=${3:-elb.log} 23 | rm -f $logfile 24 | 25 | cleanup_resources_on_error() { 26 | set +e 27 | time $ROOT_DIR/elastic-blast delete --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 28 | exit 1; 29 | } 30 | 31 | TMP=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 32 | trap "cleanup_resources_on_error; /bin/rm -f $TMP" INT QUIT HUP KILL ALRM ERR 33 | 34 | rm -fr *.fa *.out.gz elb-*.log 35 | if [ ! -z "${ELB_TC_BRANCH+x}" ] ; then 36 | if grep -q ^labels $CFG; then 37 | sed -i~ -e "s@\(^labels.*\)@\1,branch=$ELB_TC_BRANCH@" $CFG 38 | else 39 | sed -i~ -e "/^\[cluster\]/a labels = branch=$ELB_TC_BRANCH" $CFG 40 | fi 41 | fi 42 | if [ ! -z "${ELB_TC_COMMIT_SHA+x}" ] ; then 43 | if grep -q ^labels $CFG; then 44 | sed -i~ -e "s@\(^labels.*\)@\1,commit=$ELB_TC_COMMIT_SHA@" $CFG 45 | else 46 | sed -i~ -e "/^\[cluster\]/a labels = commit=$ELB_TC_COMMIT_SHA" $CFG 47 | fi 48 | fi 49 | set +e 50 | $ROOT_DIR/elastic-blast submit --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 51 | err_code=$? 52 | 53 | if [ $err_code -eq $ERR_CODE ]; then 54 | exit_code=0 55 | else 56 | exit_code=1 57 | fi 58 | 59 | $ROOT_DIR/elastic-blast delete --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 60 | 61 | exit $exit_code 62 | -------------------------------------------------------------------------------- /tests/integration-test-for-job-failure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tests/integration-test-for-job-failure.sh: End-to-end ElasticBLAST blast search 3 | # expecting a job failure 4 | # 5 | # Authors: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 6 | # Victor Joukov (joukovv@ncbi.nlm.nih.gov) 7 | # Created: Fri 02 Jul 2020 04:01:00 PM EDT 8 | 9 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 10 | set -xeuo pipefail 11 | 12 | # All other settings are specified in the config file 13 | CFG=${1:-"${SCRIPT_DIR}/../share/etc/elb-aws-blastn-out-of-memory"} 14 | ROOT_DIR=${SCRIPT_DIR}/.. 15 | export ELB_DONT_DELETE_SETUP_JOBS=1 16 | export BLAST_USAGE_REPORT=false 17 | 18 | DRY_RUN='' 19 | #DRY_RUN=--dry-run # uncomment for debugging 20 | timeout_minutes=${2:-15} 21 | logfile=${3:-elb.log} 22 | rm -f $logfile 23 | 24 | cleanup_resources_on_error() { 25 | set +e 26 | if grep -q '^aws' $CFG; then 27 | time $ROOT_DIR/elastic-blast delete --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 28 | fi 29 | exit 1; 30 | } 31 | 32 | TMP=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 33 | trap "cleanup_resources_on_error; /bin/rm -f $TMP" INT QUIT HUP KILL ALRM ERR 34 | 35 | rm -fr *.fa *.out.gz elb-*.log 36 | if [ ! -z "${ELB_TC_BRANCH+x}" ] ; then 37 | if grep -q ^labels $CFG; then 38 | sed -i~ -e "s@\(^labels.*\)@\1,branch=$ELB_TC_BRANCH@" $CFG 39 | else 40 | sed -i~ -e "/^\[cluster\]/a labels = branch=$ELB_TC_BRANCH" $CFG 41 | fi 42 | fi 43 | if [ ! -z "${ELB_TC_COMMIT_SHA+x}" ] ; then 44 | if grep -q ^labels $CFG; then 45 | sed -i~ -e "s@\(^labels.*\)@\1,commit=$ELB_TC_COMMIT_SHA@" $CFG 46 | else 47 | sed -i~ -e "/^\[cluster\]/a labels = commit=$ELB_TC_COMMIT_SHA" $CFG 48 | fi 49 | fi 50 | $ROOT_DIR/elastic-blast submit --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 51 | 52 | attempts=0 53 | [ ! -z "$DRY_RUN" ] || sleep 10 # Should be enough for the BLAST k8s jobs to get started 54 | 55 | while [ $attempts -lt $timeout_minutes ]; do 56 | exit_code=0 57 | $ROOT_DIR/elastic-blast status --verbose --exit-code --cfg $CFG $DRY_RUN || exit_code=$? 58 | 59 | # if succeeded or failed - break out of the wait cycle 60 | [ $exit_code -eq 0 ] || [ $exit_code -eq 1 ] && break 61 | [ $exit_code -ge 6 ] && break # If unknown error occurs also break out of the wait cycle 62 | 63 | attempt=$((attempts+1)) 64 | sleep 60 65 | done 66 | 67 | if [ $exit_code -eq 1 ]; then 68 | exit_code=0 69 | else 70 | exit_code=1 71 | fi 72 | 73 | if grep -q '^aws' $CFG; then 74 | if ! aws iam get-role --role-name ncbi-elasticblast-janitor-role >&/dev/null; then 75 | $ROOT_DIR/elastic-blast delete --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 76 | fi 77 | fi 78 | 79 | exit $exit_code 80 | -------------------------------------------------------------------------------- /tests/jobs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/jobs/__init__.py -------------------------------------------------------------------------------- /tests/jobs/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/jobs/test_jobs.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | # 22 | # test_jobs.py - unit test for elastic_blast.jobs module 23 | # 24 | # Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | 26 | 27 | import os 28 | from elastic_blast.jobs import read_job_template, write_job_files 29 | from tempfile import TemporaryDirectory 30 | import pytest # type: ignore 31 | 32 | 33 | @pytest.fixture 34 | def test_dir(): 35 | with TemporaryDirectory() as tempdir: 36 | yield tempdir 37 | 38 | def test_jobs(test_dir): 39 | query_path = 'gs://test-bucket' 40 | results = 'gs://results-bucket/results_path' 41 | query_num = '046' 42 | query = f'batch_{query_num}' 43 | batch_file = os.path.join(query_path, query+'.fa') 44 | template = """\ 45 | $QUERY_NUM 46 | ${QUERY} 47 | $QUERY_PATH/some_file 48 | ${RESULTS}/results.aln 49 | $SOME_UNDEFINED_VARIABLE""" 50 | map_obj = { 51 | 'RESULTS' : results 52 | } 53 | expected = f"""\ 54 | {query_num} 55 | {query} 56 | {query_path}/some_file 57 | {results}/results.aln 58 | $SOME_UNDEFINED_VARIABLE""" 59 | jobs = write_job_files(test_dir, 'job_', template, [batch_file], **map_obj) 60 | print(jobs) 61 | with open(jobs[0]) as f: 62 | job_text = f.read() 63 | assert job_text == expected 64 | 65 | 66 | def test_default_template(): 67 | job_template = read_job_template() 68 | assert type(job_template) == str 69 | assert job_template.find("${ELB_BLAST_PROGRAM}") >= 0 70 | 71 | 72 | def test_missing_template(): 73 | with pytest.raises(FileNotFoundError): 74 | read_job_template('some_wild_and_non_existing_name.template') 75 | -------------------------------------------------------------------------------- /tests/kubernetes/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/kubernetes/data/initialize_persistent_disk.ini: -------------------------------------------------------------------------------- 1 | # Sample ElasticBLAST configuration file 2 | 3 | [cloud-provider] 4 | gcp-project = my-gcp-project 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-2 10 | num-nodes = 1 11 | num-cpus = 1 12 | pd-size = 100G 13 | use-preemptible = yes 14 | 15 | [blast] 16 | program = blastp 17 | db = swissprot 18 | mem-request = 500M 19 | mem-limit = 1G 20 | queries = gs://elastic-blast-samples/queries/protein/dark-matter-1000.faa.gz 21 | results = gs://my-test-bucket 22 | 23 | [timeouts] 24 | init-pv = 1 25 | -------------------------------------------------------------------------------- /tests/kubernetes/data/job-status.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "batch/v1", 3 | "kind": "Job", 4 | "metadata": { 5 | "annotations": { 6 | "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"batch/v1\",\"kind\":\"Job\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"setup\"},\"name\":\"copy-queries-to-pd\",\"namespace\":\"default\"},\"spec\":{\"backoffLimit\":3,\"template\":{\"metadata\":{\"labels\":{\"app\":\"setup\"}},\"spec\":{\"containers\":[{\"command\":[\"gsutil\",\"-mq\",\"cp\",\"gs://camacho-test/eb239-old/query_batches/batch_*.fa\",\"/blast/queries/\"],\"image\":\"google/cloud-sdk:slim\",\"name\":\"gcp-sdk\",\"volumeMounts\":[{\"mountPath\":\"/blast/queries\",\"name\":\"queries\",\"readOnly\":false}],\"workingDir\":\"/blast/queries\"}],\"restartPolicy\":\"Never\",\"volumes\":[{\"name\":\"queries\",\"persistentVolumeClaim\":{\"claimName\":\"blast-dbs-pvc\"}}]}},\"ttlSecondsAfterFinished\":300}}\n" 7 | }, 8 | "creationTimestamp": "2020-05-07T16:20:32Z", 9 | "labels": { 10 | "app": "setup" 11 | }, 12 | "name": "copy-queries-to-pd", 13 | "namespace": "default", 14 | "resourceVersion": "4563", 15 | "selfLink": "/apis/batch/v1/namespaces/default/jobs/copy-queries-to-pd", 16 | "uid": "ac7d7219-907e-11ea-b5f1-42010a9600d2" 17 | }, 18 | "spec": { 19 | "backoffLimit": 3, 20 | "completions": 1, 21 | "parallelism": 1, 22 | "selector": { 23 | "matchLabels": { 24 | "controller-uid": "ac7d7219-907e-11ea-b5f1-42010a9600d2" 25 | } 26 | }, 27 | "template": { 28 | "metadata": { 29 | "creationTimestamp": null, 30 | "labels": { 31 | "app": "setup", 32 | "controller-uid": "ac7d7219-907e-11ea-b5f1-42010a9600d2", 33 | "job-name": "copy-queries-to-pd" 34 | } 35 | }, 36 | "spec": { 37 | "containers": [ 38 | { 39 | "command": [ 40 | "gsutil", 41 | "-mq", 42 | "cp", 43 | "gs://camacho-test/eb239-old/query_batches/batch_*.fa", 44 | "/blast/queries/" 45 | ], 46 | "image": "google/cloud-sdk:slim", 47 | "imagePullPolicy": "IfNotPresent", 48 | "name": "gcp-sdk", 49 | "resources": {}, 50 | "terminationMessagePath": "/dev/termination-log", 51 | "terminationMessagePolicy": "File", 52 | "volumeMounts": [ 53 | { 54 | "mountPath": "/blast/queries", 55 | "name": "queries" 56 | } 57 | ], 58 | "workingDir": "/blast/queries" 59 | } 60 | ], 61 | "dnsPolicy": "ClusterFirst", 62 | "restartPolicy": "Never", 63 | "schedulerName": "default-scheduler", 64 | "securityContext": {}, 65 | "terminationGracePeriodSeconds": 30, 66 | "volumes": [ 67 | { 68 | "name": "queries", 69 | "persistentVolumeClaim": { 70 | "claimName": "blast-dbs-pvc" 71 | } 72 | } 73 | ] 74 | } 75 | } 76 | }, 77 | "status": { 78 | "completionTime": "2020-05-07T16:27:12Z", 79 | "conditions": [ 80 | { 81 | "lastProbeTime": "2020-05-07T16:27:12Z", 82 | "lastTransitionTime": "2020-05-07T16:27:12Z", 83 | "status": "True", 84 | "type": "Complete" 85 | } 86 | ], 87 | "startTime": "2020-05-07T16:20:32Z", 88 | "succeeded": 1 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /tests/kubernetes/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/kubernetes 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined 5 | -------------------------------------------------------------------------------- /tests/kubernetes/test-job-init-pv.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: init-pv 5 | labels: 6 | app: setup-unit-test 7 | spec: 8 | template: 9 | metadata: 10 | labels: 11 | app: setup-unit-test 12 | spec: 13 | volumes: 14 | - name: blastdb 15 | persistentVolumeClaim: 16 | claimName: blast-dbs-pvc 17 | readOnly: false 18 | containers: 19 | - name: access-volume 20 | image: alpine:latest 21 | workingDir: /blast/blastdb 22 | volumeMounts: 23 | - name: blastdb 24 | mountPath: /blast/blastdb 25 | readOnly: false 26 | command: [ "ls", "/blast/blastdb" ] 27 | restartPolicy: Never 28 | backoffLimit: 3 29 | -------------------------------------------------------------------------------- /tests/kubernetes/test-pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: blast-dbs-pvc 5 | spec: 6 | storageClassName: "gcp-pd-ssd" 7 | accessModes: 8 | - ReadOnlyMany 9 | - ReadWriteOnce 10 | resources: 11 | requests: 12 | storage: 10G 13 | -------------------------------------------------------------------------------- /tests/kubernetes/test-storage-gcp.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: gcp-pd-ssd 5 | provisioner: kubernetes.io/gce-pd 6 | parameters: 7 | type: pd-ssd 8 | fsType: ext4 9 | replication-type: none 10 | reclaimPolicy: Delete 11 | allowVolumeExpansion: true 12 | volumeBindingMode: Immediate 13 | -------------------------------------------------------------------------------- /tests/resources/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/resources/quotas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/resources/quotas/__init__.py -------------------------------------------------------------------------------- /tests/resources/quotas/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/resources/quotas/pytest.ini -------------------------------------------------------------------------------- /tests/run-summary/data/run_summary_sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "clusterInfo": { 4 | "provider": "AWS", 5 | "numMachines": 1, 6 | "name": "elasticblast-joukovv-d484acbac", 7 | "numVCPUsPerMachine": 32, 8 | "RamPerMachine": 131072, 9 | "machineType": "m5.8xlarge" 10 | }, 11 | "runtime": { 12 | "wallClock": 36.228, 13 | "blastDbSetup": { 14 | "num": 1, 15 | "totalTime": 1.76, 16 | "minTime": 1.76, 17 | "maxTime": 1.76 18 | }, 19 | "blast": { 20 | "num": 11, 21 | "totalTime": 9.738, 22 | "minTime": 0.645, 23 | "maxTime": 0.932 24 | }, 25 | "queryDownload": { 26 | "num": 11, 27 | "totalTime": 9.05, 28 | "minTime": 0.77, 29 | "maxTime": 0.904 30 | } 31 | }, 32 | "blastData": { 33 | "queryLength": 53353969, 34 | "databaseNumSeq": 37674, 35 | "databaseLength": 7795302 36 | }, 37 | "lettersPerSecondPerCpu": 46023, 38 | "numJobs": 11, 39 | "numJobsFailed": 0, 40 | "exitCode": 0 41 | } 42 | -------------------------------------------------------------------------------- /tests/run-summary/data/run_summary_sample_failed.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "clusterInfo": { 4 | "provider": "AWS", 5 | "numMachines": 4, 6 | "numVCPUsPerMachine": 32, 7 | "RamPerMachine": 131072, 8 | "machineType": "m5.8xlarge" 9 | }, 10 | "runtime": { 11 | "wallClock": 1150.194, 12 | "blastDbSetup": { 13 | "num": 1, 14 | "totalTime": 1144.772, 15 | "minTime": 1144.772, 16 | "maxTime": 1144.772 17 | }, 18 | "queryDownload": { 19 | "num": 1, 20 | "totalTime": 0.962, 21 | "minTime": 0.962, 22 | "maxTime": 0.962 23 | }, 24 | "querySplit": { 25 | "num": 1, 26 | "totalTime": 1.971, 27 | "minTime": 1.971, 28 | "maxTime": 1.971 29 | } 30 | }, 31 | "blastData": { 32 | "queryLength": 49085274, 33 | "databaseNumSeq": 391669761, 34 | "databaseLength": 142106433671 35 | }, 36 | "lettersPerSecondPerCpu": 333, 37 | "numJobs": 2, 38 | "numJobsFailed": 1, 39 | "exitCode": 1 40 | } 41 | -------------------------------------------------------------------------------- /tests/run-summary/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/aws 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/run-summary/test_run_summary.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Unit tests for run summary command 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | import os 28 | import subprocess 29 | import pytest 30 | 31 | TEST_DIR = os.path.join(os.path.dirname(__file__), 'data') 32 | TEST_LOGS = 'aws-output-sample-aggregate.log' 33 | TEST_SUMMARY = 'run_summary_sample.json' 34 | TEST_FAILED_LOGS = 'aws-output-sample-failed-aggregate.log' 35 | TEST_FAILED_SUMMARY = 'run_summary_sample_failed.json' 36 | TEST_CASES = [ 37 | (TEST_LOGS, TEST_SUMMARY), 38 | (TEST_FAILED_LOGS, TEST_FAILED_SUMMARY) 39 | ] 40 | 41 | @pytest.mark.skipif(os.getenv('TEAMCITY_VERSION') is not None, reason='AWS credentials not set in TC') 42 | def test_from_logs(): 43 | for logs, summary in TEST_CASES: 44 | proc = subprocess.run([ 45 | 'elastic-blast', 'run-summary', '--read-logs', os.path.join(TEST_DIR, logs) 46 | ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 47 | assert proc.stderr.decode() == '' 48 | output = proc.stdout.decode() 49 | with open(os.path.join(TEST_DIR, summary)) as f: 50 | sample = f.read() 51 | assert output == sample 52 | assert proc.returncode == 0 53 | -------------------------------------------------------------------------------- /tests/split/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/split/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/split/test_split.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | Unit tests for split module 23 | 24 | """ 25 | 26 | import os 27 | from io import StringIO 28 | import tempfile 29 | import shutil 30 | import hashlib 31 | from elastic_blast import split 32 | import pytest 33 | 34 | 35 | @pytest.fixture 36 | def tmpdir(): 37 | """Fixture that creates a temporary directory and deletes it after a test""" 38 | name = tempfile.mkdtemp() 39 | yield name 40 | shutil.rmtree(name) 41 | 42 | 43 | def test_FASTAReader_multi_file(tmpdir): 44 | """Test FASTAReader with multiple files, ensure continuity in a batch.""" 45 | fasta1 =""">seq1 46 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 47 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 48 | >seq2 49 | TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT 50 | GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG""" 51 | 52 | fasta2 = """>some_id 53 | AACTCTCTCTCTCTCTCTCTCTTCTCTTCTCTCTCTCTCTCTCTCTCTC 54 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA""" 55 | 56 | 57 | # run FASTA reader on two input streams with batch size larger than 58 | # sum of both strings 59 | with StringIO(fasta1) as f1, StringIO(fasta2) as f2: 60 | reader = split.FASTAReader([f1, f2], len(fasta1) + len(fasta2) + 1, 61 | tmpdir) 62 | reader.read_and_cut() 63 | assert len(reader.queries) == 1 64 | 65 | # read resulting batch 66 | with open(os.path.join(tmpdir, 'batch_000.fa')) as f: 67 | batch = f.readlines() 68 | 69 | # check that batch has the same content as fasta1 and fasta2 combined 70 | assert hashlib.sha256('\n'.join([fasta1, fasta2, '']).encode()).hexdigest() == \ 71 | hashlib.sha256(''.join(batch).encode()).hexdigest() 72 | 73 | -------------------------------------------------------------------------------- /tests/status/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/status/__init__.py -------------------------------------------------------------------------------- /tests/status/data/status-test.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file to run blastn with taxonomic filtering 2 | 3 | [cloud-provider] 4 | gcp-project = ncbi-sandbox-blast 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 1 11 | num-cpus = 30 12 | pd-size = 1000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | options = -outfmt "6 std staxids" -negative_taxids 1866885,90964 18 | db = pdbnt 19 | mem-request = 92G 20 | mem-limit = 100G 21 | queries = gs://elastic-blast-samples/queries/WB4_2_0811/RFQT01.1.fsa_nt.gz 22 | results = gs://dummy-bucket 23 | -------------------------------------------------------------------------------- /tests/status/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/status/test_status.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | test_status.py - unit test for ElasticBlastGcp check_status method 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | import os 28 | from argparse import Namespace 29 | from elastic_blast.config import configure 30 | from elastic_blast.elb_config import ElasticBlastConfig 31 | 32 | from elastic_blast.gcp import ElasticBlastGcp 33 | from elastic_blast.constants import ElbCommand, ElbStatus 34 | from tests.utils import gke_mock 35 | 36 | DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') 37 | INI = os.path.join(DATA_DIR, 'status-test.ini') 38 | 39 | def test_status(gke_mock): 40 | "Using mock kubectl run our actual test" 41 | args = Namespace(cfg=INI) 42 | cfg = ElasticBlastConfig(configure(args), task = ElbCommand.STATUS) 43 | elastic_blast = ElasticBlastGcp(cfg) 44 | status, counters, _ = elastic_blast.check_status() 45 | assert status == ElbStatus.FAILURE 46 | assert counters == {'failed': 1, 'succeeded': 1, 'pending': 1, 'running': 1} 47 | -------------------------------------------------------------------------------- /tests/submit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/submit/__init__.py -------------------------------------------------------------------------------- /tests/submit/data/blastdb-notfound.ini: -------------------------------------------------------------------------------- 1 | [cloud-provider] 2 | gcp-project = ncbi-sandbox-blast 3 | gcp-region = us-east4 4 | gcp-zone = us-east4-b 5 | 6 | [cluster] 7 | name = pytest-elastic-blast-no-blastdb 8 | machine-type = n1-standard-32 9 | num-cpus = 30 10 | num-nodes = 10 11 | pd-size = 3000G 12 | 13 | [blast] 14 | results = gs://elasticblast-tomcat/pytest/submit/blastdb-notfound 15 | program = blastx 16 | db = some-non-exsitent-database 17 | batch-len = 10000 18 | options = -task blastx-fast 19 | queries = gs://elastic-blast-samples/queries/small/e7ebd4c9-d8a3-405c-8180-23b85f1709a7.fa 20 | -------------------------------------------------------------------------------- /tests/submit/data/elb-blastn-neg-taxidfiltering.ini: -------------------------------------------------------------------------------- 1 | # ElasticBLAST configuration file to run blastn with taxonomic filtering 2 | 3 | [cloud-provider] 4 | gcp-project = ncbi-sandbox-blast 5 | gcp-region = us-east4 6 | gcp-zone = us-east4-b 7 | 8 | [cluster] 9 | machine-type = n1-standard-32 10 | num-nodes = 1 11 | num-cpus = 30 12 | pd-size = 3000G 13 | use-preemptible = 1 14 | 15 | [blast] 16 | program = blastn 17 | options = -outfmt "6 std staxids" -negative_taxids 1866885,90964 18 | db = testdb 19 | mem-request = 92G 20 | mem-limit = 100G 21 | queries = gs://elastic-blast-samples/queries/WB4_2_0811/RFQT01.1.fsa_nt.gz 22 | results = gs://dummy-bucket 23 | 24 | -------------------------------------------------------------------------------- /tests/submit/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/submit 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/subst/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/subst/__init__.py -------------------------------------------------------------------------------- /tests/subst/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/subst/test_subst.py: -------------------------------------------------------------------------------- 1 | # PUBLIC DOMAIN NOTICE 2 | # National Center for Biotechnology Information 3 | # 4 | # This software is a "United States Government Work" under the 5 | # terms of the United States Copyright Act. It was written as part of 6 | # the authors' official duties as United States Government employees and 7 | # thus cannot be copyrighted. This software is freely available 8 | # to the public for use. The National Library of Medicine and the U.S. 9 | # Government have not placed any restriction on its use or reproduction. 10 | # 11 | # Although all reasonable efforts have been taken to ensure the accuracy 12 | # and reliability of the software and data, the NLM and the U.S. 13 | # Government do not and cannot warrant the performance or results that 14 | # may be obtained by using this software or data. The NLM and the U.S. 15 | # Government disclaim all warranties, express or implied, including 16 | # warranties of performance, merchantability or fitness for any particular 17 | # purpose. 18 | # 19 | # Please cite NCBI in any work or product based on this material. 20 | 21 | """ 22 | test_subst.py - unit test for elastic_blast.subst module 23 | 24 | Author: Victor Joukov joukovv@ncbi.nlm.nih.gov 25 | """ 26 | 27 | from elastic_blast.subst import substitute_params 28 | 29 | def test_subst(): 30 | query_num = '046' 31 | query_path = 'gs://example-bucket/some_path' 32 | map_obj = { 33 | 'QUERY_NUM' : query_num, 34 | 'QUERY_PATH' : query_path, 35 | } 36 | text = """\ 37 | ${QUERY_NUM}$QUERY_NUM 38 | ${QUERY_PATH} 39 | ${SOME_NON_EXISTING_VARIABLE}""" 40 | ref_text = f"""\ 41 | {query_num}{query_num} 42 | {query_path} 43 | ${{SOME_NON_EXISTING_VARIABLE}}""" 44 | sub_text = substitute_params(text, map_obj) 45 | assert sub_text == ref_text -------------------------------------------------------------------------------- /tests/taxonomy/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/taxonomy/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/taxonomy 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/tc-bash-runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tc-bash-runner.sh: Facilitates TC reporting of bash scripts 3 | # 4 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 5 | # Created: Wed 06 May 2020 11:45:02 AM EDT 6 | 7 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 8 | test_suite_name='ElasticBLAST-application-level-tests' 9 | echo "##teamcity[testSuiteStarted name='$test_suite_name'] " 10 | 11 | # Default is to run all scripts WITHOUT parameters 12 | test_scripts=$SCRIPT_DIR/*.sh 13 | test_label= 14 | if [ $# -ne 0 ] ; then 15 | test_scripts=$1 16 | shift 17 | # if the next argument ends with "ini", treat it as the ini file, 18 | # otherwise as additional test label for TC statistics collection 19 | if [[ $1 != *ini ]] ; then 20 | test_label="-$1" 21 | shift 22 | fi 23 | script_arguments=$* 24 | fi 25 | 26 | for t in $test_scripts; do 27 | [ $(basename $t) == $(basename ${BASH_SOURCE[0]}) ] && continue 28 | name=$(basename $t) 29 | echo "##teamcity[testStarted name='$name' captureStandardOutput='true'] " 30 | 31 | # https://stackoverflow.com/questions/20514112/git-short-branch-name-in-teamcity 32 | export ELB_TC_BRANCH=$(git symbolic-ref -q --short HEAD | tr '[A-Z]./' '[a-z]-' | cut -c-63) 33 | if [ -z ${ELB_TC_BRANCH} ]; then 34 | export ELB_TC_BRANCH=$(git describe --tags | tr '[A-Z]./' '[a-z]-' | cut -c-63) 35 | fi 36 | # https://www.jetbrains.com/help/teamcity/service-messages.html#Adding+or+Changing+a+Build+Parameter 37 | echo "##teamcity[setParameter name='env.ELB_TC_BRANCH' value='$ELB_TC_BRANCH']" 38 | 39 | export ELB_TC_COMMIT_SHA=$(git log -1 --pretty=format:%h | tr '[A-Z]./' '[a-z]-' | cut -c-63 | tr -d '\n') 40 | echo "##teamcity[setParameter name='env.ELB_TC_COMMIT_SHA' value='$ELB_TC_COMMIT_SHA']" 41 | 42 | $t $script_arguments || echo "##teamcity[testFailed name='$name'] " 43 | #awk -f $SCRIPT_DIR/parse-runtimes.awk elb.log | sed "s,\",\',g" 44 | awk '/ RUNTIME / {printf "##teamcity[buildStatisticValue key=\"%s\" value=\"%f\"]\n", $(NF-2), $(NF-1)}' elb.log | sed "s,\",\',g" 45 | echo "##teamcity[testFinished name='$name'] " 46 | 47 | if ! [ -z $test_label ] ; then 48 | cp elb.log ${test_label##-}.log 49 | fi 50 | done 51 | 52 | echo "##teamcity[testSuiteFinished name='$test_suite_name'] " 53 | -------------------------------------------------------------------------------- /tests/test-resubmission-to-same-results-bucket.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tests/test-resubmission-to-same-results-bucket.sh: Perform an end-to-end 3 | # ElasticBLAST blast search, interleaved with a submission to that uses the 4 | # same results bucket to elicit an error message. 5 | # 6 | # Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov) 7 | # Created: Wed 06 May 2020 06:59:03 AM EDT 8 | 9 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 10 | set -euo pipefail 11 | 12 | # All other settings are specified in the config file 13 | CFG=${1:-"${SCRIPT_DIR}/../share/etc/elb-blastn-pdbnt.ini"} 14 | ROOT_DIR=${SCRIPT_DIR}/.. 15 | export ELB_DONT_DELETE_SETUP_JOBS=1 16 | export BLAST_USAGE_REPORT=false 17 | 18 | [ -f $CFG ] || { echo "ElasticBLAST configuration file $CFG doesn't exist"; exit 1; } 19 | 20 | [ ! -z "${ELB_RESULTS}" ] || { echo "ELB_RESULTS environment variable must be defined"; exit 1; } 21 | 22 | DRY_RUN='' 23 | #DRY_RUN=--dry-run # uncomment for debugging 24 | timeout_minutes=${2:-5} 25 | 26 | logfile=${3:-elb.log} 27 | rm -f $logfile 28 | 29 | errmsgfile=err.msg 30 | rm -f $errmsgfile 31 | 32 | cleanup_resources_on_error() { 33 | set +e 34 | echo Cleanup on error 35 | if grep -q '^aws-' $CFG; then 36 | $ROOT_DIR/elastic-blast delete --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 37 | fi 38 | exit 1; 39 | } 40 | 41 | TMP=`mktemp -t $(basename -s .sh $0)-XXXXXXX` 42 | trap "cleanup_resources_on_error; /bin/rm -f $TMP" INT QUIT HUP KILL ALRM ERR 43 | if [ ! -z "${ELB_TC_BRANCH+x}" ] ; then 44 | if grep -q ^labels $CFG; then 45 | sed -i~ -e "s@\(^labels.*\)@\1,branch=$ELB_TC_BRANCH@" $CFG 46 | else 47 | sed -i~ -e "/^\[cluster\]/a labels = branch=$ELB_TC_BRANCH" $CFG 48 | fi 49 | fi 50 | if [ ! -z "${ELB_TC_COMMIT_SHA+x}" ] ; then 51 | if grep -q ^labels $CFG; then 52 | sed -i~ -e "s@\(^labels.*\)@\1,commit=$ELB_TC_COMMIT_SHA@" $CFG 53 | else 54 | sed -i~ -e "/^\[cluster\]/a labels = commit=$ELB_TC_COMMIT_SHA" $CFG 55 | fi 56 | fi 57 | 58 | echo Submit first time 59 | $ROOT_DIR/elastic-blast submit --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 60 | sleep 5 61 | echo Submit second time 62 | # This should fail, grab the error message to check later 63 | $ROOT_DIR/elastic-blast submit --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 2>$errmsgfile || true 64 | 65 | attempts=0 66 | [ ! -z "$DRY_RUN" ] || sleep 10 # Should be enough for the BLAST k8s jobs to get started 67 | 68 | echo Check status 69 | while [ $attempts -lt $timeout_minutes ]; do 70 | $ROOT_DIR/elastic-blast status --verbose --cfg $CFG $DRY_RUN | tee $TMP 71 | #set +e 72 | if grep '^Pending 0' $TMP && grep '^Running 0' $TMP; then 73 | break 74 | fi 75 | attempts=$((attempts+1)) 76 | sleep 60 77 | #set -e 78 | done 79 | 80 | # Clean up 81 | if grep -q '^aws-' $CFG; then 82 | if ! aws iam get-role --role-name ncbi-elasticblast-janitor-role >&/dev/null; then 83 | $ROOT_DIR/elastic-blast delete --cfg $CFG --loglevel DEBUG --logfile $logfile $DRY_RUN 84 | fi 85 | fi 86 | 87 | # Do the final error check: this string must be in the logfile 88 | echo Check the error message 89 | grep 'Please resubmit your search with a different value' $logfile || { 90 | echo "Missing expected error message in log file" ; 91 | exit 1; 92 | } 93 | -------------------------------------------------------------------------------- /tests/tuner/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/tuner/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/tuner 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tests/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/elastic-blast/db71efde1acd499196c0c67c42ca4730476bc596/tests/util/__init__.py -------------------------------------------------------------------------------- /tests/util/pytest.ini: -------------------------------------------------------------------------------- 1 | # This file is here to provide selective pytest in presence of tox.ini at the root 2 | # It allows run only this test suite as: 3 | # pytest tests/fasta_split 4 | # See https://docs.pytest.org/en/latest/customize.html for description how test root is determined -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | ; put list of your test environments here: 3 | envlist = py39, py311 4 | 5 | ; this parameter should be used if your project 6 | ; doesn't have setup.py file: http://stackoverflow.com/questions/18962403/how-do-i-run-tox-in-a-project-that-has-no-setup-py 7 | skipsdist = False 8 | 9 | [testenv] 10 | ; this line is needed for TC so it can parse tests from the build output :/ 11 | ; tox does not copy the enviornment, except for the variables below 12 | passenv = TEAMCITY_VERSION \ 13 | RUN_ALL_TESTS \ 14 | USER \ 15 | BOTO_CONFIG \ 16 | AWS_ACCESS_KEY_ID \ 17 | AWS_SECRET_ACCESS_KEY \ 18 | AWS_SECURITY_TOKEN \ 19 | AWS_SESSION_TOKEN \ 20 | AWS_DEFAULT_REGION \ 21 | BLAST_USAGE_REPORT \ 22 | CLOUDSDK_CONFIG \ 23 | CLOUDSDK_CORE_PROJECT \ 24 | CLOUDSDK_COMPUTE_ZONE 25 | 26 | ; provide path to your requirements file here: 27 | deps = 28 | -rrequirements/test.txt 29 | 30 | commands = 31 | pip install . 32 | py.test 33 | 34 | [pytest] 35 | ; put here your tests folder and module(s) to test 36 | ; for example: addopts = tests/ --cov my_module1 --cov my_module2 --cov-report term --cov-report html 37 | ; for more information see: https://pypi.python.org/pypi/pytest-cov 38 | addopts = tests/ --cov=elastic_blast --cov-report term --cov-report html -x 39 | --------------------------------------------------------------------------------