├── .cfnlintrc.yaml ├── .github ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── gh-pages.yml │ ├── stale.yml │ └── test.yml ├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── _scripts ├── configure-deploy.sh ├── deploy.sh ├── make-dist.sh └── test.sh ├── docs ├── containers │ ├── container-examples.md │ └── container-introduction.md ├── core-env │ ├── build-custom-distribution.md │ ├── create-custom-compute-resources.md │ ├── create-iam-roles.md │ ├── create-s3-bucket.md │ ├── custom-deploy.md │ ├── images │ │ ├── aws-genomics-workflows-high-level-arch.png │ │ └── ebs-autoscale.png │ ├── introduction.md │ └── setup-aws-batch.md ├── disclaimer.md ├── extra.css ├── images │ ├── AWS_logo_RGB.svg │ ├── AWS_logo_RGB_REV.svg │ ├── AWS_logo_RGB_WHT.svg │ ├── aws-genomics-workflows-banner.png │ ├── cloudformation-launch-stack.png │ ├── custom-deploy-0.png │ ├── custom-deploy-1.png │ ├── genomics-workflow.png │ ├── nextflow-0.png │ ├── root-vpc-1.png │ ├── root-vpc-2.png │ ├── root-vpc-3.png │ ├── root-vpc-4.png │ └── root-vpc-5.png ├── index.md ├── install-cromwell │ ├── images │ │ ├── screen1.png │ │ ├── screen2.png │ │ ├── screen3.png │ │ ├── screen4.png │ │ └── screen5.png │ └── index.md ├── orchestration │ ├── cost-effective-workflows │ │ ├── cost-effective-workflows.md │ │ └── images │ │ │ ├── ClusterDashboard.png │ │ │ ├── ScreenShot1.png │ │ │ ├── ScreenShot1a.png │ │ │ ├── ScreenShot2.png │ │ │ ├── ScreenShot3.png │ │ │ └── TaskDashboard.png │ ├── cromwell │ │ ├── cromwell-examples.md │ │ ├── cromwell-overview.md │ │ ├── cromwell-trouble-shooting.md │ │ └── images │ │ │ └── cromwell-on-aws_infrastructure.png │ ├── nextflow │ │ ├── images │ │ │ ├── nextflow-on-aws-infrastructure.png │ │ │ └── nextflow-on-aws-infrastructure.xml │ │ ├── nextflow-overview.md │ │ └── nextflow-trouble-shooting.md │ ├── orchestration-intro.md │ └── step-functions │ │ ├── files │ │ └── example-state-machine.json │ │ ├── images │ │ ├── aws-sfn-genomics-workflow-arch.png │ │ ├── cfn-stack-outputs-statemachineinput.png │ │ ├── cfn-stack-outputs-tab.png │ │ ├── example-state-machine.png │ │ ├── sfn-batch-job-snippet.png │ │ ├── sfn-console-execution-inprogress.png │ │ ├── sfn-console-start-execution-dialog.png │ │ ├── sfn-console-start-execution.png │ │ ├── sfn-console-statemachine.png │ │ ├── sfn-example-mapping-state-machine.png │ │ └── step-functions-structures.png │ │ ├── step-functions-examples.md │ │ └── step-functions-overview.md └── quick-start.md ├── environment.yaml ├── main.py ├── mkdocs.yml ├── requirements.txt └── src ├── .gitignore ├── aws-genomics-cdk ├── .gitignore ├── .npmignore ├── README.md ├── app.config.json ├── assets │ ├── genomics-policy-s3.json │ └── launch_template_user_data.txt ├── bin │ └── aws-genomics-cdk.ts ├── cdk.json ├── containers │ ├── README.md │ ├── build.sh │ ├── bwa │ │ └── Dockerfile │ ├── entry.dockerfile │ ├── entrypoint.sh │ ├── fastqc │ │ └── Dockerfile │ ├── gatk │ │ └── Dockerfile │ ├── minimap2 │ │ └── Dockerfile │ ├── picard │ │ └── Dockerfile │ └── samtools │ │ └── Dockerfile ├── examples │ ├── README.md │ ├── batch-bwa-job.json │ ├── batch-fastqc-job.json │ ├── batch-gatk-dictionary.json │ ├── batch-gatk-htc.json │ ├── batch-minimap2-job.json │ ├── batch-picard-add-missing-groups.json │ ├── batch-samtools-index.json │ └── batch-samtools-sort.json ├── jest.config.js ├── lib │ ├── aws-genomics-cdk-stack.ts │ ├── batch │ │ ├── batch-compute-environmnet-construct.ts │ │ ├── batch-iam-stack.ts │ │ ├── batch-stack.ts │ │ ├── job-queue-construct.ts │ │ └── launch-template-construct.ts │ ├── vpc │ │ └── vpc-stack.ts │ └── workflows │ │ ├── genomics-task-construct.ts │ │ ├── job-definition-construct.ts │ │ ├── job-definitions.ts │ │ ├── variant-calling-stack.ts │ │ └── workflow-config.ts ├── package-lock.json ├── package.json ├── test │ └── aws-genomics-cdk.test.ts └── tsconfig.json ├── containers ├── .gitignore ├── _common │ ├── README.md │ ├── aws.dockerfile │ ├── build.sh │ ├── entrypoint.aws.sh │ └── push.sh ├── bcftools │ └── Dockerfile ├── buildspec-nextflow.yml ├── buildspec-workflow-tool.yml ├── bwa │ └── Dockerfile ├── nextflow │ ├── Dockerfile │ └── nextflow.aws.sh └── samtools │ └── Dockerfile ├── ebs-autoscale ├── README.md ├── bin │ └── init-ebs-autoscale.sh └── get-amazon-ebs-autoscale.sh ├── ecs-additions ├── awscli-shim.sh ├── ecs-additions-common.sh ├── ecs-additions-cromwell.sh ├── ecs-additions-nextflow.sh ├── ecs-additions-step-functions.sh ├── ecs-logs-collector.sh ├── fetch_and_run.sh └── provision.sh ├── gwf-core-codepipeline ├── .gitignore ├── .npmignore ├── README.md ├── bin │ └── aws-genomics-workflow-code-build.ts ├── cdk.json ├── jest.config.js ├── lib │ └── aws-genomics-workflow-code-build-stack.ts ├── package-lock.json ├── package.json └── tsconfig.json ├── lambda ├── codebuild │ ├── lambda.py │ └── requirements.txt └── ecr │ ├── lambda.py │ └── requirements.txt ├── scripts └── nf-aws.py └── templates ├── README.md ├── _common └── container-build.template.yaml ├── cromwell ├── cromwell-and-core.template.yaml └── cromwell-resources.template.yaml ├── gwfcore ├── gwfcore-batch.template.yaml ├── gwfcore-code.template.yaml ├── gwfcore-efs.template.yaml ├── gwfcore-fsx.template.yaml ├── gwfcore-iam.template.yaml ├── gwfcore-launch-template.template.yaml ├── gwfcore-root.template.yaml └── gwfcore-s3.template.yaml ├── nextflow ├── nextflow-and-core.template.yaml └── nextflow-resources.template.yaml └── step-functions ├── sfn-resources-state-machine.template.yaml └── sfn-resources.template.yaml /.cfnlintrc.yaml: -------------------------------------------------------------------------------- 1 | ignore_checks: 2 | - W3 -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. 7 | -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Build docs and publish to github pages 2 | 3 | name: github pages 4 | 5 | on: 6 | workflow_dispatch: # Allow manual triggering of the action 7 | release: 8 | types: [published] 9 | 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-20.04 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.ref }} 16 | steps: 17 | - uses: actions/checkout@v2 18 | with: 19 | ref: 'master' # TODO: revert this to 'release' 20 | 21 | - name: Setup Python 22 | uses: actions/setup-python@v3.1.2 23 | with: 24 | python-version: 3.9 25 | 26 | - name: Install Python dependencies 27 | run: pip install -r requirements.txt 28 | 29 | # - name: Clean Docs 30 | # run: make clean-docs 31 | 32 | - name: Generate Docs 33 | run: mkdocs build 34 | 35 | - name: Deploy To Pages 36 | uses: peaceiris/actions-gh-pages@v3 # see https://github.com/peaceiris/actions-gh-pages for details 37 | with: 38 | github_token: ${{ secrets.GITHUB_TOKEN }} 39 | publish_branch: gh-pages # set to 'gh-pages to publish to github pages' 40 | publish_dir: ./site # Deploy the contents of ./docs to github pages -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. 2 | # 3 | # You can adjust the behavior by modifying this file. 4 | # For more information, see: 5 | # https://github.com/actions/stale 6 | name: Mark stale issues and pull requests 7 | 8 | on: 9 | schedule: 10 | - cron: '19 9 * * *' 11 | 12 | jobs: 13 | stale: 14 | 15 | runs-on: ubuntu-latest 16 | permissions: 17 | issues: write 18 | pull-requests: write 19 | 20 | steps: 21 | - uses: actions/stale@v5 22 | with: 23 | # Setting messages to an empty string will cause the automation to skip 24 | # that category 25 | ancient-issue-message: Greetings! Sorry to say but this is a very old issue that is probably not getting as much attention as it deserves. We encourage you to check if this is still an issue in the latest release and if you find that this is still a problem, please feel free to open a new one. 26 | stale-issue-message: Greetings! It looks like this issue hasn’t had any activity for over three months. We encourage you to check if this is still an issue in the latest release. Because it has been longer than three months since the last update on this, and in the absence of more information, we will be closing this issue soon. If you find that this is still a problem, please feel free to provide a comment or add an upvote to prevent automatic closure, or if the issue is already closed, please feel free to open a new one. 27 | stale-pr-message: Greetings! It looks like this PR hasn’t had any activity for over three months. Add a comment or an upvote to prevent automatic closure, or if the issue is already closed, please feel free to open a new one. 28 | 29 | # These labels are required 30 | stale-issue-label: closing-soon 31 | exempt-issue-label: automation-exempt 32 | stale-pr-label: closing-soon 33 | exempt-pr-label: needs-review 34 | response-requested-label: response-requested 35 | 36 | # Don't set closed-for-staleness label to skip closing very old issues 37 | # regardless of label 38 | closed-for-staleness-label: closed-for-staleness 39 | 40 | # Issue timing 41 | days-before-stale: 1 42 | days-before-close: 1 43 | days-before-ancient: 90 44 | 45 | # If you don't want to mark a issue as being ancient based on a 46 | # threshold of "upvotes", you can set this here. An "upvote" is 47 | # the total number of +1, heart, hooray, and rocket reactions 48 | # on an issue. 49 | minimum-upvotes-to-exempt: 1 50 | 51 | repo-token: ${{ secrets.GITHUB_TOKEN }} 52 | loglevel: DEBUG 53 | # Set dry-run to true to not perform label or close actions. 54 | dry-run: false 55 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | test: 19 | runs-on: ubuntu-latest 20 | steps: 21 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 22 | - uses: actions/checkout@v3 23 | 24 | - name: Setup Python 25 | uses: actions/setup-python@v3.1.2 26 | with: 27 | python-version: 3.9 28 | 29 | - name: Get pip cache dir 30 | id: pip-cache 31 | run: | 32 | echo "::set-output name=dir::$(pip cache dir)" 33 | 34 | - name: pip cache 35 | uses: actions/cache@v3 36 | with: 37 | path: ${{ steps.pip-cache.outputs.dir }} 38 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 39 | restore-keys: | 40 | ${{ runner.os }}-pip- 41 | 42 | - name: Install Python dependencies 43 | run: pip install -r requirements.txt 44 | 45 | - name: Test 46 | run: bash _scripts/test.sh 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | /.idea/markdown-navigator.xml 3 | /.idea/markdown-navigator/profiles_settings.xml 4 | /.idea/misc.xml 5 | /.idea/modules.xml 6 | /.idea/vcs.xml 7 | /.idea/workspace.xml 8 | 9 | #********** osx template********** 10 | 11 | .DS_Store 12 | 13 | # Thumbnails 14 | ._* 15 | 16 | # Files that might appear on external disk 17 | .Spotlight-V100 18 | .Trashes 19 | 20 | 21 | #********** windows template********** 22 | 23 | # Windows image file caches 24 | Thumbs.db 25 | 26 | # Folder config file 27 | Desktop.ini 28 | 29 | # Recycle Bin used on file shares 30 | $RECYCLE.BIN/ 31 | 32 | 33 | #********** emacs template********** 34 | 35 | *~ 36 | \#*\# 37 | /.emacs.desktop 38 | /.emacs.desktop.lock 39 | .elc 40 | auto-save-list 41 | tramp 42 | .\#* 43 | 44 | # Org-mode 45 | .org-id-locations 46 | *_archive 47 | 48 | #********** repo specific ignores ********** 49 | 50 | tmp/ 51 | site/ 52 | artifacts/ 53 | *pem 54 | *tar.gz 55 | Makefile 56 | __pycache__ 57 | publish 58 | launch.sh 59 | LICENSE-* 60 | src/templates/tests 61 | /aws-genomics-workflows.iml 62 | _ignore 63 | dist/ 64 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | 4 | - "3.6" 5 | 6 | before_install: 7 | 8 | - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 9 | - chmod +x miniconda.sh 10 | - ./miniconda.sh -b -f 11 | - export PATH=$HOME/miniconda3/bin:$PATH 12 | - which conda 13 | - conda update --yes conda 14 | 15 | install: 16 | 17 | - conda env create --file environment.yaml 18 | - source activate mkdocs 19 | 20 | script: 21 | 22 | - bash _scripts/test.sh 23 | 24 | before_deploy: 25 | 26 | - pip install awscli --upgrade 27 | - bash _scripts/configure-deploy.sh --clobber 28 | 29 | deploy: 30 | - provider: script 31 | script: bash _scripts/deploy.sh --public --verbose production 32 | skip_cleanup: true 33 | on: 34 | repo: aws-samples/aws-genomics-workflows 35 | branch: release 36 | tags: true 37 | - provider: script 38 | script: bash _scripts/deploy.sh --public --verbose test 39 | skip_cleanup: true 40 | on: 41 | repo: aws-samples/aws-genomics-workflows 42 | branch: master -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/aws-samples/genomics-workflows/issues), or [recently closed](https://github.com/aws-samples/genomics-workflows/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/genomics-workflows/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/aws-samples/genomics-workflows/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Genomics Workflows on AWS 2 | 3 | :warning: This site and related code are no longer actively maintained as of 2023-07-31. :warning: 4 | 5 | This allows all code and assets presented here to remain publicly available for historical reference purposes only. 6 | 7 | For more up to date solutions to running Genomics workflows on AWS checkout: 8 | 9 | - [Amazon Omics](https://aws.amazon.com/omics/) - a fully managed service for storing, processing, and querying genomic, transcriptomic, and other omics data into insights. [Omics Workflows](https://docs.aws.amazon.com/omics/latest/dev/workflows.html) provides fully managed execution of pre-packaged [Ready2Run](https://docs.aws.amazon.com/omics/latest/dev/service-workflows.html) workflows or private workflows you create using WDL or Nextflow. 10 | - [Amazon Genomics CLI](https://aws.amazon.com/genomics-cli/) - an open source tool that automates deploying and running workflow engines in AWS. AGC uses the same architectural patterns described here (i.e. operating workflow engines with AWS Batch). It provides support for running WDL, Nextflow, Snakemake, and CWL based workflows. 11 | 12 | --- 13 | 14 | This repository is the source code for [Genomics Workflows on AWS](). It contains markdown documents that are used to build the site as well as source code (CloudFormation templates, scripts, etc) that can be used to deploy AWS infrastructure for running genomics workflows. 15 | 16 | If you want to get the latest version of these solutions up and running quickly, it is recommended that you deploy stacks using the launch buttons available via the [hosted guide](). 17 | 18 | If you want to customize these solutions, you can create your own distribution using the instructions below. 19 | 20 | ## Creating your own distribution 21 | 22 | Clone the repo 23 | 24 | ```bash 25 | git clone https://github.com/aws-samples/aws-genomics-workflows.git 26 | ``` 27 | 28 | Create an S3 bucket in your AWS account to use for the distribution deployment 29 | 30 | ```bash 31 | aws s3 mb 32 | ``` 33 | 34 | Create and deploy a distribution from source 35 | 36 | ```bash 37 | cd aws-genomics-workflows 38 | bash _scripts/deploy.sh --deploy-region --asset-profile --asset-bucket s3:// test 39 | ``` 40 | 41 | This will create a `dist` folder in the root of the project with subfolders `dist/artifacts` and `dist/templates` that will be uploaded to the S3 bucket you created above. 42 | 43 | Use `--asset-profile` option to specify an AWS profile to use to make the deployment. 44 | 45 | **Note**: the region set for `--deploy-region` should match the region the bucket `` is created in. 46 | 47 | You can now use your deployed distribution to launch stacks using the AWS CLI. For example, to launch the GWFCore stack: 48 | 49 | ```bash 50 | TEMPLATE_ROOT_URL=https://.s3-.amazonaws.com/test/templates 51 | 52 | aws cloudformation create-stack \ 53 | --region \ 54 | --stack-name \ 55 | --template-url $TEMPLATE_ROOT_URL/gwfcore/gwfcore-root.template.yaml \ 56 | --capabilities CAPABILITY_IAM CAPABILITY_AUTO_EXPAND \ 57 | --parameters \ 58 | ParameterKey=VpcId,ParameterValue= \ 59 | ParameterKey=SubnetIds,ParameterValue=\",,...\" \ 60 | ParameterKey=ArtifactBucketName,ParameterValue= \ 61 | ParameterKey=TemplateRootUrl,ParameterValue=$TEMPLATE_ROOT_URL \ 62 | ParameterKey=S3BucketName,ParameterValue= \ 63 | ParameterKey=ExistingBucket,ParameterValue=false 64 | 65 | ``` 66 | 67 | ## Shared File System Support 68 | 69 | Amazon EFS is supported out of the box for `GWFCore` and `Nextflow`. You have two options to use EFS. 70 | 71 | 1. **Create a new EFS File System:** Be sure to have `CreateEFS` set to `Yes` and also include the total number of subnets. 72 | 2. **Use an Existing EFS File System:** Be sure to specify the EFS ID in the `ExistingEFS` parameter. This file system should be accessible from every subnet you specify. 73 | 74 | Following successful deployment of `GWFCore`, when creating your Nextflow Resources, set `MountEFS` to `Yes`. 75 | 76 | ## Building the documentation 77 | 78 | The documentation is built using mkdocs. 79 | 80 | Install dependencies: 81 | 82 | ```bash 83 | $ conda env create --file environment.yaml 84 | ``` 85 | 86 | This will create a `conda` environment called `mkdocs` 87 | 88 | Build the docs: 89 | 90 | ```bash 91 | $ conda activate mkdocs 92 | $ mkdocs build 93 | ``` 94 | 95 | ## License Summary 96 | 97 | This library is licensed under the MIT-0 License. See the LICENSE file. 98 | -------------------------------------------------------------------------------- /_scripts/configure-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create a default ~/.aws/configure file for Travis testing 4 | 5 | set -e 6 | 7 | # This script expects the following environment variable(s) 8 | # ASSET_ROLE_ARN: the AWS role ARN that is used to publish assets 9 | 10 | usage() { 11 | cat <&2 37 | exit 1 38 | ;; 39 | *) # positional agruments 40 | PARAMS="$PARAMS $1" 41 | shift 42 | ;; 43 | esac 44 | done 45 | eval set -- "$PARAMS" 46 | 47 | if [ -z $CLOBBER ]; then 48 | while true; do 49 | read -p "Overwrite ~/.aws/config file [y/n]? " yn 50 | case $yn in 51 | [Yy]* ) CLOBBER=1; break;; 52 | [Nn]* ) echo "Exiting"; exit;; 53 | * ) echo "Please answer yes or no.";; 54 | esac 55 | done 56 | fi 57 | 58 | mkdir -p $HOME/.aws 59 | cat << EOF > $HOME/.aws/config 60 | [default] 61 | region = us-east-1 62 | output = json 63 | 64 | [profile asset-publisher] 65 | region = us-east-1 66 | role_arn = ${ASSET_ROLE_ARN} 67 | credential_source = Environment 68 | EOF 69 | 70 | cat $HOME/.aws/config -------------------------------------------------------------------------------- /_scripts/make-dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make-dist.sh: Create distribution artifacts 4 | # This script is expected to be in a subdirectory of the top-level directory 5 | # It accesses the subdirectory 'src', and creates a subdirectory 'dist', in the top-level directory: 6 | # . 7 | # |-_scripts 8 | # |---make-dist.sh 9 | # |-dist 10 | # |-src 11 | 12 | 13 | VERBOSE="" 14 | PARAMS="" 15 | while (( "$#" )); do 16 | case "$1" in 17 | --verbose) 18 | VERBOSE='-v' 19 | shift 20 | ;; 21 | --) # end optional argument parsing 22 | shift 23 | break 24 | ;; 25 | -*|--*=) 26 | echo "Error: unsupported argument $1" >&2 27 | exit 1 28 | ;; 29 | *) # positional agruments 30 | PARAMS="$PARAMS $1" 31 | shift 32 | ;; 33 | esac 34 | done 35 | eval set -- "$PARAMS" 36 | 37 | echo "checking for dependencies" 38 | 39 | DEPENDENCIES=$(cat < /dev/null && pwd )" 63 | INSTALL_DIR=$(dirname $DIR) 64 | SOURCE_PATH=$INSTALL_DIR/src 65 | DIST_PATH=$INSTALL_DIR/dist 66 | 67 | TEMP_PATH=$DIST_PATH/tmp 68 | ARTIFACT_PATH=$DIST_PATH/artifacts 69 | TEMPLATES_PATH=$DIST_PATH/templates 70 | 71 | if [ ! -d $DIST_PATH ]; then 72 | mkdir -p $DIST_PATH 73 | fi 74 | 75 | cd $DIST_PATH 76 | 77 | # clean up previous dist build 78 | echo "removing previous dist in $DIST_PATH" 79 | [ ! -z $DIR ] && rm -rf $DIST_PATH/* 80 | 81 | for d in $TEMP_PATH $ARTIFACT_PATH $TEMPLATES_PATH; do 82 | if [ ! -d $d ]; 83 | then 84 | echo "creating $d" 85 | mkdir -p $d 86 | fi 87 | done 88 | 89 | # package ebs-autoscale 90 | # combines the latest release of amazon-ebs-autoscale with compatibility shim 91 | # scripts in ./ebs-autoscale/ 92 | echo "packaging amazon-ebs-autoscale" 93 | cd $TEMP_PATH 94 | 95 | RESPONSE=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest") 96 | EBS_AUTOSCALE_VERSION=$(echo $RESPONSE | jq -r .tag_name) 97 | if [[ $EBS_AUTOSCALE_VERSION = 'null' ]]; then 98 | echo "ERROR: $RESPONSE" 99 | exit 1 100 | fi 101 | curl --silent -L \ 102 | "https://github.com/awslabs/amazon-ebs-autoscale/archive/${EBS_AUTOSCALE_VERSION}.tar.gz" \ 103 | -o ./amazon-ebs-autoscale.tar.gz 104 | 105 | echo "copying $(tar -tzf ./amazon-ebs-autoscale.tar.gz | wc -l) files from ebs-autoscale $EBS_AUTOSCALE_VERSION into tmp/amazon-ebs-autoscale/" 106 | tar $VERBOSE -xzf ./amazon-ebs-autoscale.tar.gz 107 | mv ./amazon-ebs-autoscale*/ ./amazon-ebs-autoscale 108 | echo $EBS_AUTOSCALE_VERSION > ./amazon-ebs-autoscale/VERSION 109 | 110 | echo "copying src/ebs-autoscale with $(find $SOURCE_PATH/ebs-autoscale/ -type f | wc -l) files to tmp/" 111 | cp $VERBOSE -Rf $SOURCE_PATH/ebs-autoscale . 112 | echo "copying $(find amazon-ebs-autoscale -type f | wc -l) files from tmp/amazon-ebs-autoscale/ to tmp/ebs-autoscale/" 113 | cp $VERBOSE -Rf ./amazon-ebs-autoscale/* ./ebs-autoscale/ 114 | echo "creating artifacts/aws-ebs-autoscale.tgz with $(find ./ebs-autoscale/ -type f | wc -l) files from tmp/ebs-autoscale/" 115 | tar $VERBOSE -czf $ARTIFACT_PATH/aws-ebs-autoscale.tgz ./ebs-autoscale/ 116 | 117 | # add a copy of the release tarball for naming consistency 118 | echo "creating artifacts/amazon-ebs-autoscale.tgz with $(find ./amazon-ebs-autoscale/ -type f | wc -l) files from tmp/amazon-ebs-autoscale/" 119 | tar $VERBOSE -czf $ARTIFACT_PATH/amazon-ebs-autoscale.tgz ./amazon-ebs-autoscale 120 | 121 | # add a retrieval script 122 | cp $VERBOSE -f $SOURCE_PATH/ebs-autoscale/get-amazon-ebs-autoscale.sh $ARTIFACT_PATH 123 | 124 | # package crhelper lambda(s) 125 | cd $SOURCE_PATH/lambda 126 | for fn in `ls .`; do 127 | echo "packaging crhelper lambda $fn" 128 | mkdir -p $TEMP_PATH/lambda/$fn 129 | cp $VERBOSE -R $SOURCE_PATH/lambda/$fn/. $TEMP_PATH/lambda/$fn 130 | 131 | cd $TEMP_PATH/lambda/$fn 132 | [ -z $VERBOSE ] && P_QUIET='--quiet' || P_QUIET='' 133 | pip $P_QUIET install -t . -r requirements.txt 134 | echo "creating artifacts/lambda-${fn}.zip with $(find . -type f | wc -l) files" 135 | [ -z $VERBOSE ] && Z_QUIET='-q' || Z_QUIET='' 136 | zip $Z_QUIET -r $ARTIFACT_PATH/lambda-$fn.zip . 137 | done 138 | 139 | # package ecs-additions 140 | echo "packaging ecs-additions" 141 | 142 | cd $TEMP_PATH 143 | mkdir -p $TEMP_PATH/ecs-additions 144 | cp $VERBOSE -R $SOURCE_PATH/ecs-additions/. $TEMP_PATH/ecs-additions 145 | 146 | # add the amazon-ebs-autoscale retrieval script to additions 147 | cp $VERBOSE $SOURCE_PATH/ebs-autoscale/get-amazon-ebs-autoscale.sh $TEMP_PATH/ecs-additions 148 | 149 | # keep tarball for backwards compatibilty 150 | cd $TEMP_PATH 151 | tar $VERBOSE -czf $ARTIFACT_PATH/aws-ecs-additions.tgz ./ecs-additions/ 152 | 153 | # zip file for codecommit repo 154 | cd $TEMP_PATH/ecs-additions/ 155 | zip $Z_QUIET -r $ARTIFACT_PATH/aws-ecs-additions.zip ./* 156 | 157 | 158 | # package container code 159 | echo "packaging container definitions with $(find $SOURCE_PATH/containers -type f | wc -l) files" 160 | cd $SOURCE_PATH/containers 161 | zip $Z_QUIET -r $ARTIFACT_PATH/containers.zip ./* 162 | 163 | 164 | # add templates to dist 165 | echo "copying $(find $SOURCE_PATH/templates/ -type f | wc -l) cloudformation templates" 166 | cp $VERBOSE -R $SOURCE_PATH/templates/. $TEMPLATES_PATH 167 | 168 | 169 | # cleanup 170 | echo "removing temp files" 171 | rm -rf $TEMP_PATH 172 | 173 | cd $CWD -------------------------------------------------------------------------------- /_scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # check cfn templates for errors 6 | cfn-lint --version 7 | cfn-lint src/templates/**/*.template.yaml 8 | 9 | # make sure that site can build 10 | mkdocs build -------------------------------------------------------------------------------- /docs/containers/container-examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/containers/container-examples.md -------------------------------------------------------------------------------- /docs/containers/container-introduction.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/containers/container-introduction.md -------------------------------------------------------------------------------- /docs/core-env/build-custom-distribution.md: -------------------------------------------------------------------------------- 1 | # Building Custom Resources 2 | 3 | {{ deprecation_notice() }} 4 | 5 | This section describes how to build and upload templates and artifacts to use in a customized deployment. Once uploaded, the locations of the templates and artifacts are used when deploying the Nextflow on AWS Batch solution (see [Customized Deployment](custom-deploy.md)) 6 | 7 | ## Building a Custom Distribution 8 | 9 | This step involves building a distribution of templates and artifacts from the solution's source code. 10 | 11 | First, create a local clone of the [Genomics Workflows on AWS](https://github.com/aws-samples/aws-genomics-workflows) source code. The code base contains several directories: 12 | 13 | * `_scripts/`: Shell scripts for building and uploading the customized distribution of templates and artifacts 14 | * `docs/`: Source code for the documentation, written in [MarkDown](https://markdownguide.org) for the [MkDocs](https://mkdocs.org) publishing platform. This documentation may be modified, expanded, and contributed in the same way as source code. 15 | * `src/`: Source code for the components of the solution: 16 | * `containers/`: CodeBuild buildspec files for building AWS-specific container images and pushing them to ECR 17 | * `_common/` 18 | * `build.sh`: A generic build script that first builds a base image for a container, then builds an AWS specific image 19 | * `entrypoint.aws.sh`: A generic entrypoint script that wraps a call to a binary tool in the container with handlers data staging from/to S3 20 | * `nextflow/` 21 | * `Dockerfile` 22 | * `nextflow.aws.sh`: Docker entrypoint script to execute the Nextflow workflow on AWS Batch 23 | * `ebs-autoscale/` 24 | * `get-amazon-ebs-autoscale.sh`: Script to retrieve and install [Amazon EBS Autoscale](https://github.com/awslabs/amazon-ebs-autoscale) 25 | * `ecs-additions/`: Scripts to be installed on ECS host instances to support the distribution 26 | * `awscli-shim.sh`: Installed as `/opt/aws-cli/bin/aws` and mounted onto the container, allows container images without full glibc to use the AWS CLI v2 through supplied shared libraries (especially libz) and `LD_LIBRARY_PATH`. 27 | * `ecs-additions-common.sh`: Utility script to install `fetch_and_run.sh`, Nextflow and Cromwell shims, and swap space 28 | * `ecs-additions-cromwell-linux2-worker.sh`: 29 | * `ecs-additions-cromwell.sh`: 30 | * `ecs-additions-nextflow.sh`: 31 | * `ecs-additions-step-functions.sh`: 32 | * `fetch_and_run.sh`: Uses AWS CLI to download and run scripts and zip files from S3 33 | * `provision.sh`: Appended to the userdata in the launch template created by [gwfcore-launch-template](custom-deploy.md): Starts SSM Agent, ECS Agent, Docker; runs `get-amazon-ebs-autoscale.sh`, `ecs-additions-common.sh` and orchestrator-specific `ecs-additions-` scripts. 34 | * `lambda/`: Lambda functions to create, modify or delete ECR registries or CodeBuild jobs 35 | * `templates/`: CloudFormation templates for the solution stack, as described in [Customized Deployment](custom-deploy.md) 36 | 37 | ## Deploying a Custom Distribution 38 | 39 | The script `_scripts/deploy.sh` will create a custom distribution of artifacts and templates from files in the source tree, then upload this distribution to an S3 bucket. It will optionally also build and deploy a static documentation site from the Markdown documentation files. Its usage is: 40 | 41 | ```sh 42 | deploy.sh [--site-bucket BUCKET] [--asset-bucket BUCKET] 43 | [--asset-profile PROFILE] [--deploy-region REGION] 44 | [--public] [--verbose] 45 | STAGE 46 | 47 | --site-bucket BUCKET Deploy documentation site to BUCKET 48 | --asset-bucket BUCKET Deploy assets to BUCKET 49 | --asset-profile PROFILE Use PROFILE for AWS CLI commands 50 | --deploy-region REGION Deploy in region REGION 51 | --public Deploy to public bucket with '--acl public-read' (Default false) 52 | --verbose Display more output 53 | STAGE 'test' or 'production' 54 | ``` 55 | 56 | When running this script from the command line, use the value `test` for the stage. This will deploy the templates and artifacts into a directory `test` in your deployment bucket: 57 | 58 | ``` 59 | $ aws s3 ls s3://my-deployment-bucket/test/ 60 | PRE artifacts/ 61 | PRE templates/ 62 | ``` 63 | 64 | Use these values when deploying a customized installation, as described in [Customized Deployment](custom-deploy.md), sections 'Artifacts and Nested Stacks' and 'Nextflow'. In the example from above, the values to use would be: 65 | 66 | * Artifact S3 Bucket Name: `my-deployment-bucket` 67 | * Artifact S3 Prefix: `test/artifacts` 68 | * Template Root URL: `https://my-deployment-bucket.s3.amazonaws.com/test/templates` 69 | 70 | The use of `production` for stage is reserved for deployments from a Travis CI/CD environment; this usage will deploy into a subdirectory named after the current release tag. -------------------------------------------------------------------------------- /docs/core-env/create-s3-bucket.md: -------------------------------------------------------------------------------- 1 | # Core: Data Storage 2 | 3 | {{ deprecation_notice() }} 4 | 5 | You will need a robust location to store your input and output data. Genomics data files often equal or exceed 100GB per file. In addition to input sample files, genomics data processing typically relies on additional items like reference sequences or annotation databases that can be equally large. 6 | 7 | The following are key criteria for storing data for genomics workflows 8 | 9 | * accessible to compute 10 | * secure 11 | * durable 12 | * capable of handling large files 13 | 14 | Amazon S3 buckets meet all of the above conditions. S3 also makes it easy to collaboratively work on such large datasets because buckets and the data stored in them are globally available. 15 | 16 | You can use an S3 bucket to store both your input data and workflow results. 17 | 18 | ## Create an S3 Bucket 19 | 20 | You can use an existing bucket for your workflows, or you can create a new one using the methods below. 21 | 22 | ### Automated via Cloudformation 23 | 24 | | Name | Description | Source | Launch Stack | 25 | | -- | -- | :--: | :--: | 26 | {{ cfn_stack_row("Amazon S3 Bucket", "GWFCore-S3", "gwfcore/gwfcore-s3.template.yaml", "Creates a secure Amazon S3 bucket to read from and write results to.", enable_cfn_button=False) }} 27 | 28 | !!! info 29 | The launch button has been disabled above since this template is part of a set of nested templates. It is not recommended to launch it independently of its intended parent stack. 30 | 31 | ### Manually via the AWS Console 32 | 33 | * Go to the S3 Console 34 | * Click on the "Create Bucket" button 35 | 36 | In the dialog that opens: 37 | 38 | * Provide a "Bucket Name". This needs to be globally unique. 39 | 40 | * Select the region for the bucket. Buckets are globally accessible, but the data resides on physical hardware within a specific region. It is best to choose a region that is closest to where you are and where you will launch compute resources to reduce network latency and avoid inter-region transfer costs. 41 | 42 | The default options for bucket configuration are sufficient for the marjority of use cases. 43 | 44 | * Click the "Create" button to accept defaults and create the bucket. 45 | -------------------------------------------------------------------------------- /docs/core-env/custom-deploy.md: -------------------------------------------------------------------------------- 1 | # Customized Deployment 2 | 3 | {{ deprecation_notice() }} 4 | 5 | Deployments of the 'Nextflow on AWS Batch' solution are based on nested CloudFormation templates, and on artifacts comprising scripts, software packages, and configuration files. The templates and artifacts are stored in S3 buckets, and their S3 URLs are used when launching the top-level template and as parameters to that template's deployment. 6 | 7 | ## VPC 8 | The quick start link deploys the [AWS VPC Quickstart](https://aws.amazon.com/quickstart/architecture/vpc/), which creates a VPC with up to 4 Availability Zones, each with a public subnet and a private subnet with NAT Gateway access to the Internet. 9 | 10 | ## Genomics Workflow Core 11 | This quick start link deploys the CloudFormation template `gwfcore-root.template.yaml` for the Genomics Workflow Core (GWFCore) from the [Genomics Workflows on AWS](https://github.com/aws-samples/aws-genomics-workflows) solution. This template launches a number of nested templates, as shown below: 12 | 13 | * Root Stack __gwfcore-root__ - Top level template for Genomics Workflow Core 14 | * S3 Stack __gwfcore-s3__ - S3 bucket (new or existing) for storing analysis results 15 | * IAM Stack __gwfcore-iam__ - Creates IAM roles to use with AWS Batch scalable genomics workflow environment 16 | * Code Stack __gwfcore-code__ - Creates AWS CodeCommit repos and CodeBuild projects for Genomics Workflows Core assets and artifacts 17 | * Launch Template Stack __gwfcore-launch-template__ - Creates an EC2 Launch Template for AWS Batch based genomics workflows 18 | * Batch Stack __gwfcore-batch__ - Deploys resource for a AWS Batch environment that is suitable for genomics, including default and high-priority JobQueues 19 | 20 | ### Root Stack 21 | The quick start solution links to the CloudFormation console, where the 'Amazon S3 URL' field is prefilled with the S3 URL of a copy of the root stack template, hosted in the public S3 bucket __aws-genomics-workflows__. 22 | 23 | custom-deploy-0 27 | 28 | To use a customized root stack, upload your modified stack template to an S3 bucket (see [Building a Custom Distribution](build-custom-distribution.md)), and specify that template's URL in 'Amazon S3 URL'. 29 | 30 | ### Artifacts and Nested Stacks 31 | The subsequent screen, 'Specify Stack Details', allows for customization of the deployed resources in the 'Distribution Configuration' section. 32 | 33 | custom-deploy-1 37 | 38 | * __Artifact S3 Bucket Name__ and __Artifact S3 Prefix__ define the location of the artifacts uploaded prior to this deployment. By default, pre-prepared artifacts are stored in the __aws-genomics-workflows__ bucket. 39 | * __Template Root URL__ defines the bucket and prefix used to store nested templates, called by the root template. 40 | 41 | To use your own modified artifacts or nested templates, build and upload as described in [Building a Custom Distribution](build-custom-distribution.md), and specify the bucket and prefix in the fields above. 42 | 43 | ## Workflow Orchestrators 44 | ### Nextflow 45 | This quick start deploys the Nextflow template `nextflow-resources.template.yaml`, which launches one nested stack: 46 | 47 | * Root Stack __nextflow-resources__ - Creates resources specific to running Nextflow on AWS 48 | * Container Build Stack __container-build__ - Creates resources for building a Docker container image using CodeBuild, storing the image in ECR, and optionally creating a corresponding Batch Job Definition 49 | 50 | The nextflow root stack is specified in the same way as the GWFCore root stack, above, and a location for a modified root stack may be specified as with the Core stack. 51 | 52 | The subsequent 'Specify Stack Details' screen has fields allowing the customization of the Nextflow deployment. 53 | 54 | nextflow-0 58 | 59 | * __S3NextflowPrefix__, __S3LogsDirPrefix__, and __S3WorkDirPrefix__ specify the path within the GWFCore bucket in which to store per-run data and log files. 60 | * __TemplateRootUrl__ specifies the path to the nested templates called by the Nextflow root template, as with the GWFCore root stack. 61 | -------------------------------------------------------------------------------- /docs/core-env/images/aws-genomics-workflows-high-level-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/core-env/images/aws-genomics-workflows-high-level-arch.png -------------------------------------------------------------------------------- /docs/core-env/images/ebs-autoscale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/core-env/images/ebs-autoscale.png -------------------------------------------------------------------------------- /docs/core-env/introduction.md: -------------------------------------------------------------------------------- 1 | # Core: Introduction 2 | 3 | {{ deprecation_notice() }} 4 | 5 | A high level view of the architecture you will need to run workflows is shown is below. 6 | 7 | ![high level architecture](images/aws-genomics-workflows-high-level-arch.png) 8 | 9 | This section of the guide details the common components required for job execution and data storage. This includes the following: 10 | 11 | * A place to store your input data and generated results 12 | * Access controls to your data and compute resources 13 | * Code and artifacts used to provision compute resources 14 | * Containerized task scheduling and execution 15 | 16 | The above is referred to here as the "Genomics Workflows Core". To launch this core in your AWS account, use the Cloudformation template below. 17 | 18 | | Name | Description | Source | Launch Stack | 19 | | -- | -- | :--: | :--: | 20 | {{ cfn_stack_row("Genomics Workflow Core", "gwfcore", "gwfcore/gwfcore-root.template.yaml", "Create EC2 Launch Templates, AWS Batch Job Queues and Compute Environments, a secure Amazon S3 bucket, and IAM policies and roles within an **existing** VPC. _NOTE: You must provide VPC ID, and subnet IDs_.") }} 21 | 22 | The core is agnostic of the workflow orchestrator you intended to use, and can be installed multiple times in your account if needed (e.g. for use by different projects). Each installation uses a `Namespace` value to group resources accordingly. By default, the `Namespace` is set to the stack name, which must be unique within an AWS region. 23 | 24 | !!! info 25 | To create all of the resources described, the Cloudformation template above uses [Nested Stacks](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-nested-stacks.html). This is a way to modularize complex stacks and enable reuse. The individual nested stack templates are intended to be run from a parent or "root" template. On the following pages, the individual nested stack templates are available for viewing only. 26 | -------------------------------------------------------------------------------- /docs/disclaimer.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | 3 | {{ deprecation_notice() }} 4 | 5 | The architectures and solutions presented in this guide are provided "as is" per the underlying [LICENSE](https://github.com/aws-samples/aws-genomics-workflows/blob/master/LICENSE). Before implementing anything described here in a production setting we recommended that you consult with your AWS account team regarding your specific requirements for performance, scalability, and security via a [Well Architected Review](https://aws.amazon.com/architecture/well-architected/). -------------------------------------------------------------------------------- /docs/extra.css: -------------------------------------------------------------------------------- 1 | .launch-button { 2 | display: block; 3 | 4 | color: white !important; 5 | background-color: #ec7211; 6 | 7 | border-radius: .25ex .25ex .25ex .25ex; 8 | border: 1px solid #eb5f07; 9 | 10 | text-align: center; 11 | text-decoration: none; 12 | 13 | /* padding: 0.5ex 2ex; */ 14 | width: 64px; 15 | 16 | line-height: 0.5; 17 | } 18 | 19 | .launch-button-disabled { 20 | opacity: 0.2; 21 | cursor: not-allowed; 22 | } 23 | 24 | .launch-button:hover, .launch-button:visited:hover { 25 | background-color: #eb5f07; 26 | color: white !important; 27 | } 28 | 29 | .launch-button:visited { 30 | color: white !important; 31 | } 32 | 33 | .material-icons { 34 | font-family: 'Material Icons'; 35 | font-weight: normal; 36 | font-style: normal; 37 | font-size: 24px; /* Preferred icon size */ 38 | display: inline-block; 39 | line-height: 1; 40 | text-transform: none; 41 | letter-spacing: normal; 42 | word-wrap: normal; 43 | white-space: nowrap; 44 | direction: ltr; 45 | 46 | /* Support for all WebKit browsers. */ 47 | -webkit-font-smoothing: antialiased; 48 | /* Support for Safari and Chrome. */ 49 | text-rendering: optimizeLegibility; 50 | 51 | /* Support for Firefox. */ 52 | -moz-osx-font-smoothing: grayscale; 53 | 54 | /* Support for IE. */ 55 | font-feature-settings: 'liga'; 56 | } 57 | 58 | .md-header, .md-footer, .md-footer-nav, .md-footer-meta { 59 | background-color: #232f3e !important; 60 | } 61 | 62 | .screenshot { 63 | style: "float: left"; 64 | margin: 10px; 65 | border: 1px solid lightgrey; 66 | } -------------------------------------------------------------------------------- /docs/images/AWS_logo_RGB.svg: -------------------------------------------------------------------------------- 1 | AWS-Logo_Full-Color -------------------------------------------------------------------------------- /docs/images/AWS_logo_RGB_REV.svg: -------------------------------------------------------------------------------- 1 | AWS-Logo_White-Color -------------------------------------------------------------------------------- /docs/images/AWS_logo_RGB_WHT.svg: -------------------------------------------------------------------------------- 1 | AWS-Logo_White -------------------------------------------------------------------------------- /docs/images/aws-genomics-workflows-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/aws-genomics-workflows-banner.png -------------------------------------------------------------------------------- /docs/images/cloudformation-launch-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/cloudformation-launch-stack.png -------------------------------------------------------------------------------- /docs/images/custom-deploy-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/custom-deploy-0.png -------------------------------------------------------------------------------- /docs/images/custom-deploy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/custom-deploy-1.png -------------------------------------------------------------------------------- /docs/images/genomics-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/genomics-workflow.png -------------------------------------------------------------------------------- /docs/images/nextflow-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/nextflow-0.png -------------------------------------------------------------------------------- /docs/images/root-vpc-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-1.png -------------------------------------------------------------------------------- /docs/images/root-vpc-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-2.png -------------------------------------------------------------------------------- /docs/images/root-vpc-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-3.png -------------------------------------------------------------------------------- /docs/images/root-vpc-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-4.png -------------------------------------------------------------------------------- /docs/images/root-vpc-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-5.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Genomics Workflows on AWS 2 | 3 | {{ deprecation_notice() }} 4 | 5 | ![banner](images/aws-genomics-workflows-banner.png) 6 | 7 | ## Introduction 8 | 9 | Welcome! 10 | 11 | This guide walks through how to use [Amazon Web Services](https://aws.amazon.com) ([AWS](https://aws.amazon.com)), such as [Amazon S3](https://aws.amazon.com/s3) and [AWS Batch](https://aws.amazon.com/batch), to run large scale genomics analyses. 12 | 13 | Here you will learn how to: 14 | 15 | 1. Use S3 buckets to stage large genomics datasets as inputs and outputs from analysis pipelines 16 | 2. Create job queues in AWS Batch to use for scalable parallel job execution 17 | 3. Orchestrate individual jobs into analysis workflows using native AWS services like [AWS Step Functions](https://aws.amazon.com/step-functions) and 3rd party workflow engines 18 | 19 | If you're impatient and want to get something up and running immediately, head 20 | straight to the [Quick Start](quick-start) section. Otherwise, continue on for the full details. 21 | 22 | ## Prerequisites 23 | 24 | Throughout this guide we'll assume that you: 25 | 26 | 1. Are familiar with the Linux command line 27 | 2. Can use SSH to access a Linux server 28 | 3. Have access to an AWS account 29 | 30 | If you are completely new to AWS, we **highly recommend** going through the following [AWS 10-Minute Tutorials](https://aws.amazon.com/getting-started/tutorials/) that will demonstrate the basics of AWS, as well as set up your development machine for working with AWS. 31 | 32 | 1. **[Launch a Linux Virtual Machine](https://aws.amazon.com/getting-started/tutorials/launch-a-virtual-machine/)** - A tutorial which walks users through the process of starting a host on AWS, and configuring your own computer to connect over SSH. 33 | 2. **[Batch upload files to the cloud](https://aws.amazon.com/getting-started/tutorials/backup-to-s3-cli/)** - A tutorial on using the AWS Command Line Interface (CLI) to access Amazon S3. 34 | 35 | ### AWS Account Access 36 | 37 | AWS has many services that can be used for genomics. Here, we will build core architecture with [AWS Batch](https://aws.amazon.com/batch), a managed service that is built on top of other AWS services, such as [Amazon EC2](https://aws.amazon.com/ec2) and [Amazon Elastic Container Service (ECS)](https://aws.amazon.com/ecs). Along the way, we'll leverage some advanced capabilities that need escalated (administrative) privileges to implement. For example, you will need to be able to create [Roles](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) via AWS [Identity and Access Management (IAM)](https://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html), a service that helps you control who is authenticated (signed in) and authorized (has permissions) to use AWS resources. 38 | 39 | !!! tip 40 | We **strongly** recommend following the [IAM Security Best Practices](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) for securing your root AWS account and IAM users. 41 | 42 | !!! note 43 | If you are using an institutional account, it is likely you do not have administrative privileges, i.e. the IAM [AdministratorAccess](https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies_managed-vs-inline.html) managed policy is not attached to your IAM User or Role, and you won't be able to attach it yourself. 44 | 45 | If this is the case, you will need to work with your account administrator to get things set up for you. Refer them to this guide, and have them provide you with an [AWS Batch Job Queue ARN](https://docs.aws.amazon.com/batch/latest/userguide/job_queues.html), and an [Amazon S3 Bucket](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html) that you can write results to. 46 | 47 | ## Contribution 48 | 49 | This site is a living document, created for and by the genomics community at AWS and around the world. We encourage you to contribute new content and make improvements to existing content via pull request to the [GitHub repo](https://github.com/aws-samples/aws-genomics-workflows/) that hosts the source code for this site. 50 | -------------------------------------------------------------------------------- /docs/install-cromwell/images/screen1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen1.png -------------------------------------------------------------------------------- /docs/install-cromwell/images/screen2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen2.png -------------------------------------------------------------------------------- /docs/install-cromwell/images/screen3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen3.png -------------------------------------------------------------------------------- /docs/install-cromwell/images/screen4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen4.png -------------------------------------------------------------------------------- /docs/install-cromwell/images/screen5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen5.png -------------------------------------------------------------------------------- /docs/orchestration/cost-effective-workflows/images/ClusterDashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ClusterDashboard.png -------------------------------------------------------------------------------- /docs/orchestration/cost-effective-workflows/images/ScreenShot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot1.png -------------------------------------------------------------------------------- /docs/orchestration/cost-effective-workflows/images/ScreenShot1a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot1a.png -------------------------------------------------------------------------------- /docs/orchestration/cost-effective-workflows/images/ScreenShot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot2.png -------------------------------------------------------------------------------- /docs/orchestration/cost-effective-workflows/images/ScreenShot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot3.png -------------------------------------------------------------------------------- /docs/orchestration/cost-effective-workflows/images/TaskDashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/TaskDashboard.png -------------------------------------------------------------------------------- /docs/orchestration/cromwell/cromwell-trouble-shooting.md: -------------------------------------------------------------------------------- 1 | # Cromwell Troubleshooting 2 | 3 | {{ deprecation_notice() }} 4 | 5 | The following are some common errors that we have seen and suggested solutions 6 | 7 | ## S3 Access Denied (403) 8 | ### Possible Cause(s) 9 | A 403 error from S3 indicates that Cromwell is trying to access an S3 object 10 | that it doesn't have permission to. 11 | Following the priciple of "least access" Cromwell uses an IAM EC2 instance role 12 | that grants it read and write access to the S3 bucket you specified in the 13 | CloudFormation deployment and read only access to the `gatk-test-data` 14 | and `broad-references` S3 buckets. 15 | If your workflow references other S3 objects (**even ones in your account**) 16 | you will need to allow this via changes to the IAM role. Similarly if a step in 17 | your workflow attempts to write to another bucket you will need to add 18 | the appropriate permissions. 19 | 20 | ### Suggested Solution(s) 21 | 22 | * Add read access to additional buckets by attaching a policy to the Cromwell 23 | server's IAM EC2 instance role with content similar to: 24 | 25 | ```JSON 26 | { 27 | "Version": "2012-10-17", 28 | "Statement": [ 29 | { 30 | "Effect": "Allow", 31 | "Action": [ 32 | "s3:GetObject", 33 | "s3:ListBucket" 34 | ], 35 | "Resource": [ 36 | "arn:aws:s3:::bucket-a", 37 | "arn:aws:s3:::bucket-a/*", 38 | "arn:aws:s3:::another-bucket", 39 | "arn:aws:s3:::another-bucket/*" 40 | ] 41 | } 42 | ] 43 | } 44 | ``` 45 | The exact name of the role will be unique and generated by CloudFormation, 46 | however it will contain the words "CromwellServer" and it will be the role attached 47 | to the EC2 running the Cromwell server. 48 | 49 | ## S3 File Not Found (404) 50 | 51 | ### Possible Cause(s) 52 | * A file required by the workflow cannot be found at the 53 | specified S3 Path. Your workflow inputs might have the incorrect path OR an 54 | expected file was not created by the previous step. 55 | 56 | ### Suggested Solution(s) 57 | * Check the paths of inputs and that the expected file exists at that path. 58 | * If the file name is something like `-rc.txt` the previous task 59 | failed before it was able to write out the result code. Inspect the `stderr.txt` 60 | and `stdout.txt` of the previous step for possible reasons. 61 | 62 | ## Cromwell Server OutOfMemory errors 63 | 64 | ### Possible Cause(s) 65 | 66 | * Out of memory errors on the Cromwell Server are typically the result of the JVM running 67 | out of memory while attempting to keep track of multiple workflows or workflows with very 68 | large scatter steps. 69 | 70 | ### Suggested Solutions 71 | 72 | * Consider upgrading the server instance type to one with more RAM. 73 | * Investigate tuning [Cromwell's `job-control` limits](https://github.com/broadinstitute/cromwell/blob/9249537fd094c6979b0c64e99fcc90d48c861487/core/src/main/resources/reference.conf#L543-L572) 74 | to find a configuration that appropriately restricts the number of queued Akka messages. 75 | * Consider increasing the maximum instance RAM available to the JVM. Our Cloudformation templates 76 | this set to 85% (`-XX:MaxRAMPercentage=85.0`) allowing some head room for the OS. 77 | On larger instance types you may be able to increase this further. 78 | * Ensure you are *not* using an in memory database on the server instance. Our cloudformation templates configure 79 | a separate Aurora MySQL cluster to avoid this. 80 | 81 | ## Cromwell Task (Container) OutOfMemory errors 82 | 83 | ### Possible Cause(s) 84 | 85 | * Individual tasks from a workflow run in docker containers on AWS Batch. If those containers 86 | have insufficient RAM for the task they can fail. 87 | * Some older applications (including older versions of the JVM) do not always respect the memory 88 | limits imposed by the container and may think they have resources they cannot use. 89 | 90 | ### Suggested solutions 91 | 92 | * Assign more memory to the task in the `runtime: {}` stanza of the WDL or if the task application 93 | allows use command line or configuration parameters to appropriately limit memory. 94 | * For tasks executed by the JVM investigate `-Xmx` and `-XX:MaxRAMPercentage` parameters. 95 | 96 | ## Cromwell submitted AWS Batch jobs hang in 'Runnable' state 97 | 98 | ### Possible Causes 99 | * The resources requested by the task exceed the largest size of instance available in your AWS Batch Compute Environment 100 | * Batch worker EC2 instances are not able to join the Compute Environments ECS cluster 101 | 102 | ### Suggested solutions 103 | * Reduce the resources required by your task to less than the maximum CPU and memory of the largest instance type allowed 104 | in your Batch Compute Environment. 105 | * In your EC2 console determine if any gwf-core workers have started. If they have then ensure they have a route to the 106 | internet (for example, does your subnet have a NAT gateway). Worker nodes require access to the internet so that required 107 | dependencies can be downloaded by the worker nodes at startup time. If this process fails then Docker will not start, the 108 | ecs-agent will not run, and the systems manager will also not run. In addition, the node will also not be able to 109 | communicate with the AWS Batch service. -------------------------------------------------------------------------------- /docs/orchestration/cromwell/images/cromwell-on-aws_infrastructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cromwell/images/cromwell-on-aws_infrastructure.png -------------------------------------------------------------------------------- /docs/orchestration/nextflow/images/nextflow-on-aws-infrastructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/nextflow/images/nextflow-on-aws-infrastructure.png -------------------------------------------------------------------------------- /docs/orchestration/nextflow/images/nextflow-on-aws-infrastructure.xml: -------------------------------------------------------------------------------- 1 | 7Zxfd6MoFMA/TR6nR0RN8tim7U737J85m9nT2aceoiRhq5KD2KTz6RciGgXTpInaduv0oXKFC3J/XuBepwM4iTa/MLRa/k4DHA5sK9gM4PXAtoHljsQvKXnOJMMxyAQLRgJVaSeYkp84b6mkKQlwUqnIKQ05WVWFPo1j7POKDDFG19VqcxpWe12hBTYEUx+FpvSeBHyppMCydje+YrJYqq5HrroRobyyEiRLFNB1SQRvBnDCKOXZVbSZ4FBOXj4vWbvbPXeLgTEc86MaOFmLJxSm6uH+wBs+D8WgbOs7Sh7Fr1/pLFHD5c/5HNCUhyTGk2KKrQG8WjAUENH1hIaUCVlMY1H9asmjUJSAuFwvCcfTFfKlmrXAQ8jmNObKyMDOy6onqTXhjD7iXOfAhiMBzNiTNUkYan09YcaJMNVlSBaxEHMqu0Cq5IvBYab6qNMYoGSJAzXYRIyTxIvvUsU1FAI1WaILvNk746Cwo3gBMI0wZ8+iimrg5Cwo9uHIzcrrEkkjVWdZgghAJUSK3kWhe2dgcaFsvMfe9kv2/opRkNm7N3dL5gZw2KG5TWtf3k+F4Apxf3mcjbWZEz+3sq99tjeMN9/+04xXVlSYwCqZ8zc0w+E3mhBOqDRkRIIgrLP3jHJOozqTv4JCS6OwBIMiQfjplbwTbRZySbtA68S5WDEapD6/8+UQr1Ysu6jWmW1nuhGUIKyiNDZJqgXJagAkaICk1gYBC0cCGtYNTUUDjaaCsqNoKqA50nuUUDrkvFCyyp52TjZyHPXkMJzQlPlYoSOKdez4+dw+gGYIcjVn5ABz7fFqEPIaIMjcaAxsL+TSZORJXC7k5W4tym4JpaW7NQ1K+NW36KFsDUq7HSiBZ/q1tqB0DSinUC6Pqf+IubgAPUCNAzTbTu7DmvDlA539K/QmzYBkO1WQbKs77zY8AJLdg/RxQSr28B2ANDJAuosTjmIfH3kE7xF6BUKkmNtmDnvDt9tfjc2jXoR+ike2rZvJX/1Zr4OzHvZZMxx5Gkf20HRArR31ANjrgnoP1JoHascBdbmXBmaMwOAFBwuczyRlfEkXNEbhzU5aNgOOg0sZrJfGDKn/KEXhbFvObbrFBzGe11NIiZa3JMz1GIC5jnvlOdImNI2DgqRstHKIL8++eKKtYSt+V4xigVUtp95G5Yhf/qIyHCJOnqo91llBqftGScx39h5amr1dzZDZSFWrch5AU+SAqiLXGVYVZc9nKBITj55L1VayQrJ/wJ4e/dbzEwfqAxX02jGZjWBHaDHhx0FrngA/FrQCTfb8o1z4RxYuxNH2VKJtk+gsbdAJ0kBzYR7QSDwWafuQoj1In0KRZ26++jj7/yXODh1zF9/e7ss8AH56h5S/XhWP1N0i+wVo8QBvfOIq++WQouZcUv6BQYmje8oeVaI3j5bP2Aux9Ws8J3HmGPZW78PtLZ8PAuqnEY6bCm3BN4yR2ubBsjHXlrusA54t94BbYWdHhPxdLPsvtzP3lW+xi6DUeHSa93IOKWrQe9kfHhW8IfxH6TpbBYdnrII1+/LSN2NtY+TpngO6p2Gk78vd4blHzXNAqwtaGCth9q3DkSnpu3iVynFZcly29WfKs3K/hL7REprlhtpZP7tMDdnmKbMxp3js0WCfVzzxrNC4j3RqfCTszEcCLVrlnuwjDylqKBznaPFDZwRf3gK8XP/scFxNHv2DI37ijhGaHA+7w9hraMdoH1LU4I6xxbjJuz5c1JAy7owUPVTm6GmDY0nRExnOkZGRc/MPjortHZt/0PIV5zs8M23/d9J/TNvCLjBNcEPZeY15ALrLsUIzhLL9zxrWNJ1FJEm2sboenYbRYWn84NMoQnHQDEHgLRFqMbTyjtP0+ZtTORl0t05q2/lT0/Q6OIai5jZU8HN+zlH3PQfc8zb3oGQhkYNrUv/p6gdYlFztEyA734J3sSi1+BlOV/H+Iu994Q5KmW9wsiuCNcGsDj/EsZtyRYcUNeiK3kGE9C32NnWgeJ9mzXptDAAOGz3Ti+Lu7zVk1Xd/9QLe/Ac= -------------------------------------------------------------------------------- /docs/orchestration/nextflow/nextflow-trouble-shooting.md: -------------------------------------------------------------------------------- 1 | # Nextflow Troubleshooting 2 | 3 | {{ deprecation_notice() }} 4 | 5 | The following are some common errors that we have seen and suggested solutions 6 | 7 | ## Job Logs say there is an error in AWS CLI while loading shared libraries 8 | ### Possible Cause(s) 9 | Nextflow on AWS Batch relies on the process containers being able to use the AWS CLI (which is mounted from the container host). 10 | Very minimal container images such as Alpine do not contain the `glibc` libraries needed by the AWS CLI. 11 | 12 | ### Suggested Solution(s) 13 | 14 | * Modify your image to include or mount these dependencies 15 | * Use an image (or build from a base) that already contains these such as `ubuntu:latest` 16 | 17 | ## AWS credentials not working when set in the environment 18 | ### Possible Cause(s) 19 | You are using a local run of nextflow with temporary federated or IAM role credentials that use the AWS_SESSION_TOKEN in addition to AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY. Nextflow does not look for the AWS_SESSION_TOKEN environment variable as detailed at [nextflow/issues/2839](https://github.com/nextflow-io/nextflow/issues/2839) 20 | 21 | ### Suggested Solution(s) 22 | * Instead of using local credentials consider using an IAM role associated to an EC2 instance or ECS container where the Nextflow binary runs from. This will not require setting any local credentials and remove the need to update a session token. 23 | * If you are using just using Nextflow locally for testing purposes, you can set credentials in a local `nextflow.config` which does support the AWS_SESSION_TOKEN 24 | ``` 25 | aws { 26 | accessKey = 'XXXXXXXXXXXXXXXX' 27 | secretKey = 'XXXXXXXXXXXXXXXX' 28 | sessionToken = 'XXXXXXXXXXXXXXX' 29 | } 30 | ``` 31 | * ***N.B.*** If you set the `sessionToken` in the Nextflow config it will expire and will need to be updated. This expiry time will depend on the configuration of credentials generation within your account. 32 | 33 | ## Container start errors 34 | ``` 35 | CannotStartContainerError: Error response from daemon: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: exec: "/usr/local/env-execute": stat /usr/local/env-execute: no such file or directory: un 36 | ``` 37 | ### Possible Cause(s) 38 | Nextflow on AWS Batch relies on the process containers being able to use a number of scripts that are mounted to the container. If references to these are wrong or do not exist then the tasks will not start. 39 | 40 | ### Suggested Solution(s) 41 | * If using the provided image setup with no changes, check the path specified for the aws-cli in your `nextflow.config` is set to 42 | ``` 43 | aws.batch.cliPath = '/opt/aws-cli/bin/aws' 44 | ``` 45 | 46 | * Check the target S3 bucket created in the set-up has the following path: `bucket-name/-ecs-additions/SourceStag/ ` and that content is present. 47 | * This location should contain a zip file that has the following in it: 48 | 49 | ``` 50 | . 51 | ├── awscli-shim.sh 52 | ├── ecs-additions-common.sh 53 | ├── ecs-additions-cromwell.sh 54 | ├── ecs-additions-nextflow.sh 55 | ├── ecs-additions-step-functions.sh 56 | ├── ecs-logs-collector.sh 57 | ├── fetch_and_run.sh 58 | ├── get-amazon-ebs-autoscale.sh 59 | └── provision.sh 60 | ``` 61 | * If this is missing check that `-ecs-additions` exists and ran successfully in AWS Codepipeline and rerun if failures are present. 62 | -------------------------------------------------------------------------------- /docs/orchestration/orchestration-intro.md: -------------------------------------------------------------------------------- 1 | # Workflow Orchestration 2 | 3 | {{ deprecation_notice() }} 4 | 5 | Having deployed a way to execute individual tasks via AWS Batch, we turn to 6 | orchestration of complete workflows. 7 | 8 | In order to process data, we will need to handle the cases for serial and parallel task execution, and retry logic when a task fails. 9 | 10 | The logic for workflows should live outside of the code for any individual task. There are a couple of options that researchers can use to define and execute repeatable data analysis pipelines on AWS Batch: 11 | 12 | 1. [AWS Step Functions](./step-functions/step-functions-overview.md), a native AWS service for workflow orchestration. 13 | 14 | 2. 3rd party alternatives: 15 | 16 | * [Cromwell](./cromwell/cromwell-overview.md), a workflow execution system 17 | from the [Broad Institute](https://www.broadinstitute.org/) 18 | 19 | * [Nextflow](./nextflow/nextflow-overview.md), a reactive workflow framework and domain specific language (DSL) from the [Comparative Bioinformatics group](https://www.crg.eu/en/programmes-groups/notredame-lab) at the Barcelona [Centre for Genomic Regulation (CRG)](http://www.crg.eu/) 20 | 21 | !!! help 22 | There are many more 3rd party alternatives. We are actively seeking out 23 | help to document them here! 24 | -------------------------------------------------------------------------------- /docs/orchestration/step-functions/files/example-state-machine.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment":"A simple example that submits a Job to AWS Batch", 3 | "StartAt":"RunIsaacJob", 4 | "States":{ 5 | "RunIsaacJob":{ 6 | "Type":"Task", 7 | "Resource":"arn:aws:states:::batch:submitJob.sync", 8 | "Parameters":{ 9 | "JobDefinition":"Isaac", 10 | "JobName.$":"$.isaac.JobName", 11 | "JobQueue":"HighPriority", 12 | "Parameters.$": "$.isaac" 13 | }, 14 | "TimeoutSeconds": 1800, 15 | "HeartbeatSeconds": 60, 16 | "Next":"Parallel", 17 | "InputPath":"$", 18 | "ResultPath":"$.status", 19 | "Retry" : [ 20 | { 21 | "ErrorEquals": [ "States.Timeout" ], 22 | "IntervalSeconds": 3, 23 | "MaxAttempts": 2, 24 | "BackoffRate": 1.5 25 | } 26 | ] 27 | }, 28 | "Parallel":{ 29 | "Type":"Parallel", 30 | "Next":"FinalState", 31 | "Branches":[ 32 | { 33 | "StartAt":"RunStrelkaJob", 34 | "States":{ 35 | "RunStrelkaJob":{ 36 | "Type":"Task", 37 | "Resource":"arn:aws:states:::batch:submitJob.sync", 38 | "Parameters":{ 39 | "JobDefinition":"Strelka", 40 | "JobName.$":"$.strelka.JobName", 41 | "JobQueue":"HighPriority", 42 | "Parameters.$": "$.strelka" 43 | }, 44 | "TimeoutSeconds": 1800, 45 | "HeartbeatSeconds": 60, 46 | "Next":"RunSnpEffJob", 47 | "InputPath":"$", 48 | "ResultPath":"$.status", 49 | "Retry" : [ 50 | { 51 | "ErrorEquals": [ "States.Timeout" ], 52 | "IntervalSeconds": 3, 53 | "MaxAttempts": 2, 54 | "BackoffRate": 1.5 55 | } 56 | ] 57 | }, 58 | "RunSnpEffJob":{ 59 | "Type":"Task", 60 | "Resource":"arn:aws:states:::batch:submitJob.sync", 61 | "Parameters":{ 62 | "JobDefinition":"SNPEff", 63 | "JobName.$":"$.snpeff.JobName", 64 | "JobQueue":"HighPriority", 65 | "Parameters.$": "$.snpeff" 66 | }, 67 | "TimeoutSeconds": 1800, 68 | "HeartbeatSeconds": 60, 69 | "Retry" : [ 70 | { 71 | "ErrorEquals": [ "States.Timeout" ], 72 | "IntervalSeconds": 3, 73 | "MaxAttempts": 2, 74 | "BackoffRate": 1.5 75 | } 76 | ], 77 | "End":true 78 | } 79 | } 80 | }, 81 | { 82 | "StartAt":"RunSamtoolsStatsJob", 83 | "States":{ 84 | "RunSamtoolsStatsJob":{ 85 | "Type":"Task", 86 | "Resource":"arn:aws:states:::batch:submitJob.sync", 87 | "Parameters":{ 88 | "JobDefinition":"SamtoolsStats", 89 | "JobName.$":"$.samtools.JobName", 90 | "JobQueue":"HighPriority", 91 | "Parameters.$": "$.samtools" 92 | }, 93 | "TimeoutSeconds": 1800, 94 | "HeartbeatSeconds": 60, 95 | "End":true, 96 | "Retry" : [ 97 | { 98 | "ErrorEquals": [ "States.Timeout" ], 99 | "IntervalSeconds": 3, 100 | "MaxAttempts": 2, 101 | "BackoffRate": 1.5 102 | } 103 | ] 104 | } 105 | } 106 | } 107 | ] 108 | }, 109 | "FinalState":{ 110 | "Type":"Pass", 111 | "End":true 112 | } 113 | } 114 | } -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/aws-sfn-genomics-workflow-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/aws-sfn-genomics-workflow-arch.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/cfn-stack-outputs-statemachineinput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/cfn-stack-outputs-statemachineinput.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/cfn-stack-outputs-tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/cfn-stack-outputs-tab.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/example-state-machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/example-state-machine.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/sfn-batch-job-snippet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-batch-job-snippet.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/sfn-console-execution-inprogress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-execution-inprogress.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/sfn-console-start-execution-dialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-start-execution-dialog.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/sfn-console-start-execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-start-execution.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/sfn-console-statemachine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-statemachine.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/sfn-example-mapping-state-machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-example-mapping-state-machine.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/images/step-functions-structures.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/step-functions-structures.png -------------------------------------------------------------------------------- /docs/orchestration/step-functions/step-functions-examples.md: -------------------------------------------------------------------------------- 1 | # Step Functions Workflow Examples 2 | 3 | THIS IS A STUB 4 | 5 | ![Example Workflow](./images/example-state-machine.png) 6 | 7 | this was created from [this file](./files/example-state-machine.json). -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: mkdocs 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.6.6 6 | - pip 7 | - pip: 8 | - cfn-lint 9 | - fontawesome-markdown==0.2.6 10 | - mkdocs==1.0.4 11 | - mkdocs-macros-plugin==0.2.4 12 | - mkdocs-markdownextradata-plugin==0.0.5 13 | - mkdocs-material==3.1.0 14 | - pymdown-extensions==6.0 15 | prefix: /Users/pwyming/anaconda3/envs/mkdocs 16 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | defines macros for documents using mkdocs-macros-plugin 3 | """ 4 | 5 | from textwrap import dedent 6 | from functools import wraps 7 | 8 | def dedented(f): 9 | @wraps(f) 10 | def wrapper(*args, **kwargs): 11 | return dedent(f(*args, **kwargs).strip()) 12 | return wrapper 13 | 14 | def declare_variables(variables, macro): 15 | 16 | _artifacts = variables['artifacts'] 17 | 18 | @macro 19 | @dedented 20 | def cfn_button(name, template, enabled=True): 21 | """ 22 | create an cloudformation launch button 23 | """ 24 | s3 = _artifacts['s3'] 25 | 26 | if template.lower().startswith('http'): 27 | template_url = template 28 | else: 29 | s3['object'] = "/".join( 30 | filter(None, [s3.get('prefix'), 'latest', 'templates', template]) 31 | ) 32 | 33 | template_url = "https://{bucket}.s3.amazonaws.com/{object}".format(**s3) 34 | 35 | cfn_url = "".join([ 36 | "https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=", 37 | name, 38 | "&templateURL=", 39 | template_url, 40 | ]) 41 | 42 | img_src = "/" + "/".join( 43 | filter(None, [s3.get('prefix'), 'images/cloudformation-launch-stack.png']) 44 | ) 45 | 46 | html = 'play_arrow' 47 | if not enabled: 48 | html = 'play_arrow' 49 | 50 | return html.format(name=name, img=img_src, url=cfn_url) 51 | 52 | @macro 53 | @dedented 54 | def download_button(path, icon="cloud_download"): 55 | """ 56 | create a download button 57 | """ 58 | repo_url = variables['repo_url'] 59 | s3 = _artifacts['s3'] 60 | 61 | if path.lower().startswith('http'): 62 | src_url = path 63 | else: 64 | # s3['object'] = "/".join( 65 | # filter(None, [s3.get('prefix'), path]) 66 | # ) 67 | 68 | # src_url = "https://s3.amazonaws.com/{bucket}/{object}".format(**s3) 69 | if repo_url.endswith("/"): 70 | repo_url = repo_url[:-1] 71 | 72 | if path.startswith("/"): 73 | path = path[1:] 74 | 75 | src_url = f"{repo_url}/blob/master/src/{path}" 76 | 77 | return """ 78 | {icon} 79 | """.format(icon=icon, url=src_url) 80 | 81 | @macro 82 | @dedented 83 | def cfn_stack_row(name, stack_name, template, description, enable_cfn_button=True): 84 | if template.lower().startswith('http'): 85 | stack_url = template 86 | else: 87 | stack_url = "templates/" + template 88 | 89 | return """ 90 | | {name} | {description} | {download_button} | {cfn_button} | 91 | """.format( 92 | name=name, 93 | stack_name=stack_name, 94 | download_button=download_button(stack_url), 95 | cfn_button=cfn_button(stack_name, template, enabled=enable_cfn_button), 96 | description=description 97 | ) 98 | 99 | @macro 100 | @dedented 101 | def deprecation_notice(): 102 | return """ 103 | !!! error "DEPRECATION NOTICE" 104 | This site and related code are no longer actively maintained. 105 | 106 | This site will be disabled and the underlying Github repository will be **archived on 2023-07-31**. This allows all code and assets presented here to remain publicly available for historical reference purposes only. 107 | 108 | For more up to date solutions to running Genomics workflows on AWS checkout: 109 | 110 | - [Amazon Omics](https://aws.amazon.com/omics/) - a fully managed service for storing, processing, and querying genomic, transcriptomic, and other omics data into insights. [Omics Workflows](https://docs.aws.amazon.com/omics/latest/dev/workflows.html) provides fully managed execution of pre-packaged [Ready2Run](https://docs.aws.amazon.com/omics/latest/dev/service-workflows.html) workflows or private workflows you create using WDL or Nextflow. 111 | - [Amazon Genomics CLI](https://aws.amazon.com/genomics-cli/) - an open source tool that automates deploying and running workflow engines in AWS. AGC uses the same architectural patterns described here (i.e. operating workflow engines with AWS Batch). It provides support for running WDL, Nextflow, Snakemake, and CWL based workflows. 112 | """ -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Genomics Workflows on AWS 2 | nav: 3 | - Overview: index.md 4 | - Disclaimer: disclaimer.md 5 | - Quick Start: quick-start.md 6 | - Core Environment: 7 | - Introduction: core-env/introduction.md 8 | - Data Storage: core-env/create-s3-bucket.md 9 | - Permissions: core-env/create-iam-roles.md 10 | - Compute Resources: core-env/create-custom-compute-resources.md 11 | - AWS Batch: core-env/setup-aws-batch.md 12 | - Customized Deployment: core-env/custom-deploy.md 13 | - Building a Custom Distribution: core-env/build-custom-distribution.md 14 | # - Containerized Tooling: 15 | # - Introduction: containers/container-introduction.md 16 | # - Examples: containers/container-examples.md 17 | - Workflow Orchestration: 18 | - Introduction: orchestration/orchestration-intro.md 19 | - AWS Step Functions: 20 | - Overview: orchestration/step-functions/step-functions-overview.md 21 | # - Examples: orchestration/step-functions/step-functions-examples.md 22 | - Cromwell: 23 | - Overview: orchestration/cromwell/cromwell-overview.md 24 | - Examples: orchestration/cromwell/cromwell-examples.md 25 | - Trouble Shooting: orchestration/cromwell/cromwell-trouble-shooting.md 26 | - Nextflow: 27 | - Overview: orchestration/nextflow/nextflow-overview.md 28 | # - Examples: orchestration/nextflow/nextflow-examples.md 29 | - Trouble Shooting: orchestration/nextflow/nextflow-trouble-shooting.md 30 | - Cost Effective Workflows: orchestration/cost-effective-workflows/cost-effective-workflows.md 31 | 32 | extra_css: [extra.css] 33 | theme: 34 | name: material 35 | logo: 'images/AWS_logo_RGB_REV.svg' 36 | palette: 37 | primary: blue grey 38 | accent: deep orange 39 | markdown_extensions: 40 | - admonition 41 | - pymdownx.emoji: 42 | emoji_generator: !!python/name:pymdownx.emoji.to_png 43 | 44 | repo_url: &repo_url https://github.com/aws-samples/aws-genomics-workflows/ 45 | repo_name: Contribute 46 | copyright: 2019 Amazon Web Services 47 | 48 | plugins: 49 | - search 50 | - macros 51 | 52 | extra: 53 | repo_url: *repo_url 54 | artifacts: 55 | s3: 56 | bucket: aws-genomics-workflows 57 | site: 58 | s3: 59 | bucket: docs.opendata.aws 60 | prefix: genomics-workflows 61 | 62 | use_directory_urls: false -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs==1.3.0 2 | mkdocs-macros-plugin==0.2.4 3 | mkdocs-markdownextradata-plugin==0.0.5 4 | mkdocs-material==3.1.0 5 | pymdown-extensions==10.0 6 | jinja2==3.0.0 # https://github.com/mkdocs/mkdocs/issues/2799 7 | cfn-lint 8 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.pem 2 | _ignore 3 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | 6 | # CDK asset staging directory 7 | .cdk.staging 8 | cdk.out 9 | cdk.context.json -------------------------------------------------------------------------------- /src/aws-genomics-cdk/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/app.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "accountID": "111111111111", 3 | "region": "us-west-2", 4 | "projectName": "genomics", 5 | "tags": [{ 6 | "name": "Environment", 7 | "value": "production" 8 | }, 9 | { 10 | "name": "Project", 11 | "value": "genomics-pipeline" 12 | } 13 | ], 14 | "S3": { 15 | "existingBucket": true, 16 | "bucketName": "YOUR-BUCKET-NAME" 17 | }, 18 | "VPC": { 19 | "createVPC": true, 20 | "VPCName": "genomics-vpc", 21 | "maxAZs": 2, 22 | "cidr": "10.0.0.0/16", 23 | "cidrMask": 24 24 | }, 25 | "batch": { 26 | "defaultVolumeSize": 100, 27 | "spotMaxVCPUs": 128, 28 | "onDemendMaxVCPUs": 128, 29 | "instanceTypes": [ 30 | "c4.large", 31 | "c4.xlarge", 32 | "c4.2xlarge", 33 | "c4.4xlarge", 34 | "c4.8xlarge", 35 | "c5.large", 36 | "c5.xlarge", 37 | "c5.2xlarge", 38 | "c5.4xlarge", 39 | "c5.9xlarge", 40 | "c5.12xlarge", 41 | "c5.18xlarge", 42 | "c5.24xlarge" 43 | ] 44 | }, 45 | "workflows": [{ 46 | "name": "variantCalling", 47 | "spot": true 48 | }] 49 | } 50 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/assets/genomics-policy-s3.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Deny", 6 | "Action": [ 7 | "s3:Delete*", 8 | "s3:PutBucket*" 9 | ], 10 | "Resource": [ 11 | "arn:aws:s3:::BUCKET_NAME" 12 | ] 13 | }, 14 | { 15 | "Effect": "Allow", 16 | "Action": [ 17 | "s3:ListBucket*" 18 | ], 19 | "Resource": [ 20 | "arn:aws:s3:::BUCKET_NAME" 21 | ] 22 | }, 23 | { 24 | "Effect": "Allow", 25 | "Action": [ 26 | "s3:*" 27 | ], 28 | "Resource": [ 29 | "arn:aws:s3:::BUCKET_NAME/*" 30 | ] 31 | } 32 | ] 33 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/assets/launch_template_user_data.txt: -------------------------------------------------------------------------------- 1 | MIME-Version: 1.0 2 | Content-Type: multipart/mixed; boundary="==BOUNDARY==" 3 | 4 | --==BOUNDARY== 5 | Content-Type: text/cloud-config; charset="us-ascii" 6 | 7 | #cloud-config 8 | repo_update: true 9 | repo_upgrade: security 10 | 11 | packages: 12 | - jq 13 | - btrfs-progs 14 | - sed 15 | - git 16 | - amazon-ssm-agent 17 | - unzip 18 | - amazon-cloudwatch-agent 19 | 20 | write_files: 21 | - permissions: '0644' 22 | path: /opt/aws/amazon-cloudwatch-agent/etc/config.json 23 | content: | 24 | { 25 | "agent": { 26 | "logfile": "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log" 27 | }, 28 | "logs": { 29 | "logs_collected": { 30 | "files": { 31 | "collect_list": [ 32 | { 33 | "file_path": "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log", 34 | "log_group_name": "/aws/ecs/container-instance/${Namespace}", 35 | "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/amazon-cloudwatch-agent.log" 36 | }, 37 | { 38 | "file_path": "/var/log/cloud-init.log", 39 | "log_group_name": "/aws/ecs/container-instance/${Namespace}", 40 | "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/cloud-init.log" 41 | }, 42 | { 43 | "file_path": "/var/log/cloud-init-output.log", 44 | "log_group_name": "/aws/ecs/container-instance/${Namespace}", 45 | "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/cloud-init-output.log" 46 | }, 47 | { 48 | "file_path": "/var/log/ecs/ecs-init.log", 49 | "log_group_name": "/aws/ecs/container-instance/${Namespace}", 50 | "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-init.log" 51 | }, 52 | { 53 | "file_path": "/var/log/ecs/ecs-agent.log", 54 | "log_group_name": "/aws/ecs/container-instance/${Namespace}", 55 | "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-agent.log" 56 | }, 57 | { 58 | "file_path": "/var/log/ecs/ecs-volume-plugin.log", 59 | "log_group_name": "/aws/ecs/container-instance/${Namespace}", 60 | "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-volume-plugin.log" 61 | } 62 | ] 63 | } 64 | } 65 | } 66 | } 67 | 68 | runcmd: 69 | 70 | # start the amazon-cloudwatch-agent 71 | - /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/opt/aws/amazon-cloudwatch-agent/etc/config.json 72 | 73 | # install aws-cli v2 and copy the static binary in an easy to find location for bind-mounts into containers 74 | - curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip" 75 | - unzip -q /tmp/awscliv2.zip -d /tmp 76 | - /tmp/aws/install -b /usr/bin 77 | 78 | # check that the aws-cli was actually installed. if not shutdown (terminate) the instance 79 | - command -v aws || shutdown -P now 80 | 81 | - mkdir -p /opt/aws-cli/bin 82 | - cp -a $(dirname $(find /usr/local/aws-cli -name 'aws' -type f))/. /opt/aws-cli/bin/ 83 | 84 | # set environment variables for provisioning 85 | - export GWFCORE_NAMESPACE=${Namespace} 86 | - export INSTALLED_ARTIFACTS_S3_ROOT_URL=$(aws ssm get-parameter --name /gwfcore/${Namespace}/installed-artifacts/s3-root-url --query 'Parameter.Value' --output text) 87 | 88 | # enable ecs spot instance draining 89 | - echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config 90 | 91 | # pull docker images only if missing 92 | - echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config 93 | 94 | - cd /opt 95 | - aws s3 sync $INSTALLED_ARTIFACTS_S3_ROOT_URL/ecs-additions ./ecs-additions 96 | - chmod a+x /opt/ecs-additions/provision.sh 97 | - /opt/ecs-additions/provision.sh 98 | 99 | --==BOUNDARY==-- -------------------------------------------------------------------------------- /src/aws-genomics-cdk/bin/aws-genomics-cdk.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import "source-map-support/register"; 3 | import * as cdk from "@aws-cdk/core"; 4 | import { AwsGenomicsCdkStack } from "../lib/aws-genomics-cdk-stack"; 5 | import * as config from "../app.config.json"; 6 | 7 | const env = { 8 | account: process.env.CDK_DEFAULT_ACCOUNT ?? config.accountID, 9 | region: process.env.CDK_DEFAULT_REGION ?? config.region, 10 | }; 11 | 12 | const app = new cdk.App(); 13 | const genomicsStack = new AwsGenomicsCdkStack( 14 | app, 15 | `${config.projectName}CdkStack`, 16 | { 17 | env: env, 18 | } 19 | ); 20 | 21 | for (let i = 0; i < config.tags.length; i++) { 22 | cdk.Tags.of(genomicsStack).add(config.tags[i].name, config.tags[i].value); 23 | } 24 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/aws-genomics-cdk.ts", 3 | "context": { 4 | "@aws-cdk/core:enableStackNameDuplicates": "true", 5 | "aws-cdk:enableDiffNoFail": "true", 6 | "@aws-cdk/core:stackRelativeExports": "true", 7 | "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true, 8 | "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true, 9 | "@aws-cdk/aws-kms:defaultKeyPolicies": true, 10 | "@aws-cdk/aws-s3:grantWriteWithoutAcl": true, 11 | "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | DEFAULT_PROJECT_NAME="genomics" 5 | IMAGE_NAME=$1 6 | PROJECT_NAME="${2:-$DEFAULT_PROJECT_NAME}" 7 | DOCKER_FILE_PATH="./${IMAGE_NAME}/Dockerfile" 8 | REGISTRY="$CDK_DEFAULT_ACCOUNT.dkr.ecr.$CDK_DEFAULT_REGION.amazonaws.com" 9 | REPOSITORY_NAME="${PROJECT_NAME}/${IMAGE_NAME}" 10 | IMAGE_TAG=":latest" 11 | IMAGE_WITH_TAG="${IMAGE_NAME}${IMAGE_TAG}" 12 | REGISTRY_PATH="${REGISTRY}/${REPOSITORY_NAME}" 13 | REGISTRY_PATH_WITH_TAG="${REGISTRY}/${PROJECT_NAME}/${IMAGE_WITH_TAG}" 14 | 15 | 16 | if [ -z "${IMAGE_NAME}" ] 17 | then 18 | echo "Missing image name parameter." 19 | exit 1 20 | fi 21 | 22 | if [[ ! -f "${DOCKER_FILE_PATH}" ]] 23 | then 24 | echo "${DOCKER_FILE_PATH} does not exist on the filesystem." 25 | exit 1 26 | fi 27 | 28 | if [ -z "$CDK_DEFAULT_ACCOUNT" ] 29 | then 30 | echo "Missing CDK_DEFAULT_ACCOUNT environment variable." 31 | exit 1 32 | fi 33 | 34 | if [ -z "$CDK_DEFAULT_REGION" ] 35 | then 36 | echo "Missing CDK_DEFAULT_REGION environment variable." 37 | exit 1 38 | fi 39 | 40 | 41 | echo "Docker Login to ECR" 42 | eval $(aws ecr get-login --no-include-email --region ${CDK_DEFAULT_REGION}) 43 | 44 | 45 | # Check if the repository exists in ECR and if not, create it 46 | REPO=`aws ecr describe-repositories | grep -o ${REGISTRY_PATH}` || true 47 | if [ "${REPO}" != "${REGISTRY_PATH}" ] 48 | then 49 | aws ecr create-repository --repository-name ${REPOSITORY_NAME} 50 | fi 51 | 52 | # build the base image 53 | docker build \ 54 | -t ${IMAGE_NAME} \ 55 | -f ${DOCKER_FILE_PATH} . 56 | 57 | # build the image with an AWS specific entrypoint 58 | docker build \ 59 | --build-arg BASE_IMAGE=${IMAGE_NAME} \ 60 | -t ${IMAGE_WITH_TAG} \ 61 | -f ./entry.dockerfile . 62 | 63 | 64 | # tag the image 65 | docker tag ${IMAGE_WITH_TAG} ${REGISTRY_PATH} 66 | 67 | 68 | # push the image to the registry 69 | docker push ${REGISTRY_PATH_WITH_TAG} -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/bwa/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build 2 | 3 | ARG BWA_VERSION=0.7.17 4 | 5 | RUN apt-get update -y \ 6 | && apt-get install -y \ 7 | wget \ 8 | make \ 9 | gcc \ 10 | zlib1g-dev \ 11 | bzip2 12 | 13 | 14 | WORKDIR /opt/src 15 | RUN wget https://github.com/lh3/bwa/releases/download/v${BWA_VERSION}/bwa-${BWA_VERSION}.tar.bz2 \ 16 | && tar -xjvf bwa-*.tar.bz2 \ 17 | && cd bwa-* \ 18 | && make \ 19 | && cp bwa /opt/src 20 | 21 | 22 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final 23 | 24 | RUN apt-get update -y \ 25 | && apt-get install -y \ 26 | wget \ 27 | make \ 28 | zlib1g \ 29 | bzip2 \ 30 | && apt-get clean 31 | 32 | WORKDIR /opt/bin 33 | COPY --from=build /opt/src/bwa . 34 | 35 | ENV PATH=/opt/bin:$PATH 36 | 37 | WORKDIR /scratch 38 | 39 | ENTRYPOINT ["bwa"] -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/entry.dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE 2 | FROM ${BASE_IMAGE}:latest 3 | 4 | RUN apt-get update 5 | RUN apt-get install -y gettext-base wget 6 | RUN apt-get clean 7 | 8 | ENV PATH=/opt/bin:$PATH 9 | 10 | COPY entrypoint.sh /opt/bin/entrypoint.sh 11 | RUN chmod +x /opt/bin/entrypoint.sh 12 | 13 | WORKDIR /scratch 14 | 15 | ENTRYPOINT ["entrypoint.sh"] -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/fastqc/Dockerfile: -------------------------------------------------------------------------------- 1 | # base image 2 | FROM ubuntu:xenial 3 | 4 | # metadata 5 | LABEL base.image="ubuntu:xenial" 6 | LABEL version="1" 7 | LABEL software="FASTQC" 8 | LABEL software.version="0.11.8" 9 | LABEL description="A quality control analysis tool for high throughput sequencing data" 10 | LABEL website="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" 11 | LABEL license="https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt" 12 | LABEL maintainer="Abigail Shockey" 13 | LABEL maintainer.email="abigail.shockey@slh.wisc.edu" 14 | 15 | RUN apt-get update && apt-get install -y \ 16 | unzip \ 17 | wget \ 18 | perl \ 19 | default-jre \ 20 | && apt-get clean && apt-get autoclean && rm -rf /var/lib/apt/lists/* 21 | 22 | RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip && \ 23 | unzip fastqc_v0.11.8.zip && \ 24 | rm fastqc_v0.11.8.zip && \ 25 | chmod +x FastQC/fastqc 26 | 27 | 28 | ENV PATH="${PATH}:/FastQC/" 29 | 30 | RUN mkdir /data 31 | WORKDIR /data 32 | 33 | ENTRYPOINT ["fastqc"] -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/gatk/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM broadinstitute/gatk:4.1.3.0 2 | 3 | ENTRYPOINT ["gatk"] -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/minimap2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 AS build 2 | 3 | ARG VERSION=2.17 4 | 5 | # metadata 6 | LABEL base.image="ubuntu:18.04" 7 | LABEL container.version="1" 8 | LABEL software="Minimap2" 9 | LABEL software.version="${VERSION}" 10 | LABEL description="versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database" 11 | LABEL website="https://github.com/lh3/minimap2" 12 | LABEL license="https://github.com/lh3/minimap2/blob/master/LICENSE.txt" 13 | LABEL maintainer="Kelsey Florek" 14 | LABEL maintainer.email="Kelsey.florek@slh.wisc.edu" 15 | 16 | # install dependeny tools 17 | RUN apt-get update && apt-get install -y python curl bzip2 && apt-get clean 18 | 19 | # download and extract minimap2 20 | WORKDIR /opt/bin 21 | RUN curl -L https://github.com/lh3/minimap2/releases/download/v2.17/minimap2-2.17_x64-linux.tar.bz2 | tar -jxvf - 22 | 23 | # add minimap2 to the path 24 | ENV PATH="${PATH}:/opt/bin/minimap2-2.17_x64-linux" 25 | 26 | WORKDIR /scratch 27 | 28 | ENTRYPOINT ["minimap2"] -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/picard/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM broadinstitute/picard 2 | 3 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/containers/samtools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build 2 | 3 | ARG VERSION=1.9 4 | 5 | # Metadata 6 | LABEL container.base.image="ubuntu:18.04" 7 | LABEL software.name="SAMtools" 8 | LABEL software.version=${VERSION} 9 | LABEL software.description="Utilities for the Sequence Alignment/Map (SAM/BAM/CRAM) formats" 10 | LABEL software.website="http://www.htslib.org" 11 | LABEL software.documentation="http://www.htslib.org/doc/samtools.html" 12 | LABEL software.license="MIT/Expat" 13 | LABEL tags="Genomics" 14 | 15 | # System and library dependencies 16 | RUN apt-get -y update && \ 17 | apt-get -y install \ 18 | autoconf \ 19 | automake \ 20 | make \ 21 | gcc \ 22 | perl \ 23 | zlib1g-dev \ 24 | libbz2-dev \ 25 | liblzma-dev \ 26 | libcurl4-gnutls-dev \ 27 | libssl-dev \ 28 | libncurses5-dev \ 29 | wget && \ 30 | apt-get clean 31 | 32 | # Application installation 33 | RUN wget -O /samtools-${VERSION}.tar.bz2 \ 34 | https://github.com/samtools/samtools/releases/download/${VERSION}/samtools-${VERSION}.tar.bz2 && \ 35 | tar xvjf /samtools-${VERSION}.tar.bz2 && rm /samtools-${VERSION}.tar.bz2 36 | 37 | WORKDIR /samtools-${VERSION} 38 | RUN ./configure && make 39 | 40 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final 41 | COPY --from=build /samtools-*/samtools /usr/local/bin 42 | 43 | RUN apt-get -y update && \ 44 | apt-get -y install \ 45 | libcurl3-gnutls && \ 46 | apt-get clean 47 | 48 | ENTRYPOINT ["samtools"] -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/README.md: -------------------------------------------------------------------------------- 1 | # Bioinformatics tools examples 2 | 3 | After [deploying the CDK genomics pipeline project](GITHUB URL) you could test 4 | the genomics tools directly with AWS Batch or start a Step Functions pipeline. 5 | 6 | 7 | ### Testing bioinformatics tools using AWS Batch 8 | Create a file named batch-TOOL_NANE.json. 9 | ``` 10 | { 11 | "jobName": "", 12 | "jobQueue": "", 13 | "jobDefinition": "", 14 | "containerOverrides": { 15 | "vcpus": 1, 16 | "memory": 1000, 17 | "command": [""], 18 | "environment": [{ 19 | "name": "JOB_INPUTS", 20 | "value": "" 21 | }, 22 | { 23 | "name": "JOB_OUTPUTS", 24 | "value": "" 25 | }, 26 | { 27 | "name": "JOB_OUTPUT_PREFIX", 28 | "value": "" 29 | } 30 | ] 31 | } 32 | } 33 | 34 | ``` 35 | 36 | **jobName** (string) 37 | The name of the job. The first character must be alphanumeric, and up to 128 38 | letters (uppercase and lowercase), numbers, hyphens, and underscores are 39 | allowed. 40 | 41 | **jobQueue** (string) 42 | The [job queue](https://docs.aws.amazon.com/batch/latest/userguide/job_queues.html) 43 | into which the job is submitted. You can specify either the name or the Amazon 44 | Resource Name (ARN) of the queue. 45 | 46 | **jobDefinition** (string) 47 | The [job definition](https://docs.aws.amazon.com/batch/latest/userguide/job_definitions.html) 48 | used by this job. This value can be one of name , name:revision , or the Amazon 49 | Resource Name (ARN) for the job definition. If name is specified without 50 | a revision then the latest active revision is used. 51 | 52 | **containerOverrides.vcpus** (integer optional) 53 | The number of vCPUs to reserve for the container. This value overrides the 54 | value set in the job definition. 55 | 56 | **containerOverrides.memory** (integer optional) 57 | The number of MiB of memory reserved for the job. This value overrides the 58 | value set in the job definition. 59 | 60 | **containerOverrides.command** (list) 61 | The command to send to the container that overrides the default command from 62 | the Docker image or the job definition. 63 | 64 | **containerOverrides.environment** (list) 65 | The environment variables to send to the container. You can add new environment 66 | variables, which are added to the container at launch, or you can override the 67 | existing environment variables from the Docker image or the job definition. 68 | (structure) 69 | A key-value pair object. 70 | **name** (string) 71 | The name of the key-value pair. For environment variables, this is the name of 72 | the environment variable. 73 | **value** (string) 74 | The value of the key-value pair. For environment variables, this is the value 75 | of the environment variable. 76 | 77 | Example for a `batch-fastqc.json` 78 | ``` 79 | { 80 | "jobName": "fastqc", 81 | "jobQueue": "genomics-default-queue", 82 | "jobDefinition": "genomics-fastqc:1", 83 | "containerOverrides": { 84 | "vcpus": 1, 85 | "memory": 1000, 86 | "command": ["fastqc *.gz"], 87 | "environment": [{ 88 | "name": "JOB_INPUTS", 89 | "value": "s3://aws-batch-genomics-shared/secondary-analysis/example-files/fastq/NIST7035_R*.fastq.gz" 90 | }, 91 | { 92 | "name": "JOB_OUTPUTS", 93 | "value": "*.html *.zip" 94 | }, 95 | { 96 | "name": "JOB_OUTPUT_PREFIX", 97 | "value": "s3://my-genomics-bucket-name/some-folder-name" 98 | } 99 | ] 100 | } 101 | } 102 | 103 | ``` 104 | In this example we are running the FastQC tools that will take fastq files and 105 | generate a report. It will output zip and html files which we will save to an 106 | S3 bucket. 107 | **jobName** - "fastqc". A name that describe the job to be run. 108 | **jobQueue** - "genomics-default-queue". A valid name of a job queue. This 109 | could be found in the AWS web console > Batch > Job queues. 110 | **jobDefinition** - "genomics-fastqc:1". A valid and active job definition and 111 | it's version. This could be found in the AWS web console > Batch > Job 112 | definitions. 113 | **containerOverrides.vcpus** - 1. Request a machine that has at least 1 core. 114 | **containerOverrides.memory** - 1000. Request a machine that has at least 115 | 1000MiB of RAM. 116 | **containerOverrides.command** - ["fastqc *.gz"]. Run the fastq command on all 117 | the .gz files in the working directory. 118 | **containerOverrides.environment** - A list of key-value pairs. 119 | 120 | **name**: JOB_INPUTS. 121 | **value**: fastq files from a source S3 bucket 122 | 123 | **name**: JOB_OUTPUTS. 124 | **value**: "*.html *.zip". Copy all html and zip files from a local directory 125 | to an S3 bucket. 126 | 127 | **name**: JOB_OUTPUT_PREFIX. 128 | **value**: An S3 bucket and a prefix (folder) to copy the output files into. 129 | 130 | 131 | There are several examples under the `examples` directory. To run an example, 132 | edit the example file you want to run (e.g., `examples/batch-fastqc-job.json`), 133 | update the `JOB_INPUTS` to a valid source of your sample fastq files, or leave 134 | the default value to use a demo sample. Update the `JOB_OUTPUT_PREFIX` to a 135 | valid s3 bucket and a subfolder where you want the output zip and html files 136 | to be saved to. 137 | 138 | Change directory to the examples directory and then submit the job to Batch. 139 | 140 | ``` 141 | cd examples 142 | aws batch submit-job --cli-input-json file://batch-fastqc-job.json 143 | ``` 144 | 145 | Navigate to the Batch jobs page (AWS console -> AWS Batch -> Jobs -> select the 146 | job queue you used (e.g., `genomics-default-queue`) to track the progress of 147 | the job. You can click on the job name and them click on the Log stream name 148 | link to track the stdout on the running task. 149 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-bwa-job.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "bwa", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "bwa:1", 5 | "containerOverrides": { 6 | "command": ["bwa mem -t 8 -p -o ${SAMPLE_ID}.sam ${REFERENCE_NAME}.fasta ${SAMPLE_ID}_1*.fastq.gz"], 7 | "memory": 32000, 8 | "environment": [{ 9 | "name": "JOB_INPUTS", 10 | "value": "s3://1000genomes/pilot_data/data/NA12878/pilot3_unrecal/SRR014820_*.fastq.gz s3://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta*" 11 | }, 12 | { 13 | "name": "SAMPLE_ID", 14 | "value": "SRR014820" 15 | }, 16 | { 17 | "name": "REFERENCE_NAME", 18 | "value": "Homo_sapiens_assembly38" 19 | }, 20 | { 21 | "name": "JOB_OUTPUTS", 22 | "value": "*.sam" 23 | }, 24 | { 25 | "name": "JOB_OUTPUT_PREFIX", 26 | "value": "s3://YOUR-BUCKET-NAME/output" 27 | } 28 | ] 29 | } 30 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-fastqc-job.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "fastqc", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "fastqc:1", 5 | "containerOverrides": { 6 | "command": ["fastqc *.gz"], 7 | "environment": [{ 8 | "name": "JOB_INPUTS", 9 | "value": "s3://1000genomes/pilot_data/data/NA12878/pilot3_unrecal/SRR014820_*.fastq.gz" 10 | }, 11 | { 12 | "name": "JOB_OUTPUTS", 13 | "value": "*.html *.zip" 14 | }, 15 | { 16 | "name": "JOB_OUTPUT_PREFIX", 17 | "value": "s3://YOUR-BUCKET-NAME/output" 18 | } 19 | ] 20 | } 21 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-gatk-dictionary.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "gatk-create-dictionary", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "gatkCreateSequenceDictionary:1", 5 | "containerOverrides": { 6 | "command": ["java -jar /usr/app/picard.jar CreateSequenceDictionary R=Homo_sapiens_assembly38.fasta O=Homo_sapiens_assembly38.dict"], 7 | "environment": [{ 8 | "name": "JOB_INPUTS", 9 | "value": "s3://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta" 10 | }, 11 | { 12 | "name": "JOB_OUTPUTS", 13 | "value": "*.dict" 14 | }, 15 | { 16 | "name": "JOB_OUTPUT_PREFIX", 17 | "value": "s3://YOUR-BUCKET-NAME/ref" 18 | } 19 | ] 20 | } 21 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-gatk-htc.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "gatkHaploTypeCaller", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "gatkHaplotypeCaller:1", 5 | "containerOverrides": { 6 | "command": ["gatk --java-options \"-Xmx4g\" HaplotypeCaller -R ${REFERENCE_NAME}.fasta -I ${SAMPLE_ID}.bam -O ${SAMPLE_ID}.vcf.gz -bamout ${SAMPLE_ID}.out.bam"], 7 | "environment": [{ 8 | "name": "JOB_INPUTS", 9 | "value": "s3://YOUR-BUCKET-NAME/ref s3://YOUR-BUCKET-NAME/samples" 10 | }, 11 | { 12 | "name": "SAMPLE_ID", 13 | "value": "SRR014820" 14 | }, 15 | { 16 | "name": "REFERENCE_NAME", 17 | "value": "Homo_sapiens_assembly38" 18 | }, 19 | { 20 | "name": "JOB_OUTPUTS", 21 | "value": "*.out.bam *.vcf.gz" 22 | }, 23 | { 24 | "name": "JOB_OUTPUT_PREFIX", 25 | "value": "s3://YOUR-BUCKET-NAME/output" 26 | }, 27 | { 28 | "name": "JOB_INPUT_S3_COPY_METHOD", 29 | "value": "s3sync" 30 | } 31 | ] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-minimap2-job.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "minimap2", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "minimap2:1", 5 | "containerOverrides": { 6 | "vcpus": 8, 7 | "memory": 32000, 8 | "command": ["minimap2 -ax map-pb Homo_sapiens_assembly38.fasta SRR014820_1.fastq.gz > SRR014820.sam"], 9 | "environment": [{ 10 | "name": "JOB_INPUTS", 11 | "value": "s3://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta s3://1000genomes/pilot_data/data/NA12878/pilot3_unrecal/SRR014820_1.fastq.gz" 12 | }, 13 | { 14 | "name": "JOB_OUTPUTS", 15 | "value": "*.sam" 16 | }, 17 | { 18 | "name": "JOB_OUTPUT_PREFIX", 19 | "value": "s3://YOUR-BUCKET-NAME/output" 20 | } 21 | ] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-picard-add-missing-groups.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "picard-add-missing-groups", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "picardAddMissingGroups:1", 5 | "containerOverrides": { 6 | "command": ["java -jar /usr/picard/picard.jar AddOrReplaceReadGroups", 7 | " -I ${SAMPLE_ID}.bam -O ${SAMPLE_ID}.rg.bam -RGID 4 --RGLB lib1 ", 8 | " --RGPL ILLUMINA --RGPU unit1 --RGSM 20;", 9 | " mv ${SAMPLE_ID}.rg.bam ${SAMPLE_ID}.bam;" 10 | ], 11 | "environment": [{ 12 | "name": "JOB_INPUTS", 13 | "value": "s3://YOUR-BUCKET-NAME/samples/SRR014820.bam" 14 | }, 15 | { 16 | "name": "SAMPLE_ID", 17 | "value": "SRR014820" 18 | }, 19 | { 20 | "name": "JOB_OUTPUTS", 21 | "value": "*.bam" 22 | }, 23 | { 24 | "name": "JOB_OUTPUT_PREFIX", 25 | "value": "s3://YOUR-BUCKET-NAME/output" 26 | } 27 | ] 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-samtools-index.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "samtoolsIndex", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "samtoolsIndex:1", 5 | "containerOverrides": { 6 | "command": ["samtools index ${SAMPLE_ID}.bam"], 7 | "environment": [{ 8 | "name": "JOB_INPUTS", 9 | "value": "s3://YOUR-BUCKET-NAME/samples/SRR014820.bam" 10 | }, 11 | { 12 | "name": "SAMPLE_ID", 13 | "value": "SRR014820" 14 | }, 15 | { 16 | "name": "JOB_OUTPUTS", 17 | "value": "*.bam *.bai" 18 | }, 19 | { 20 | "name": "JOB_OUTPUT_PREFIX", 21 | "value": "s3://YOUR-BUCKET-NAME/output" 22 | } 23 | ] 24 | } 25 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/examples/batch-samtools-sort.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "samtoolsSort", 3 | "jobQueue": "genomics-default-queue", 4 | "jobDefinition": "samtoolsSort:1", 5 | "containerOverrides": { 6 | "command": ["samtools sort -@ 4 -o ${SAMPLE_ID}.bam ${SAMPLE_ID}.sam"], 7 | "environment": [{ 8 | "name": "JOB_INPUTS", 9 | "value": "s3://YOUR-BUCKET-NAME/samples/SRR014820.sam" 10 | }, 11 | { 12 | "name": "SAMPLE_ID", 13 | "value": "SRR014820" 14 | }, 15 | { 16 | "name": "JOB_OUTPUTS", 17 | "value": "*.bam" 18 | }, 19 | { 20 | "name": "JOB_OUTPUT_PREFIX", 21 | "value": "s3://YOUR-BUCKET-NAME/output" 22 | } 23 | ] 24 | } 25 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | roots: ['/test'], 3 | testMatch: ['**/*.test.ts'], 4 | transform: { 5 | '^.+\\.tsx?$': 'ts-jest' 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/aws-genomics-cdk-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as ec2 from "@aws-cdk/aws-ec2"; 3 | import * as s3 from "@aws-cdk/aws-s3"; 4 | import * as config from "../app.config.json"; 5 | import GenomicsVpcStack from "./vpc/vpc-stack"; 6 | import GenomicsBatchStack from "./batch/batch-stack"; 7 | 8 | //Workflows 9 | import { WorkflowConfig } from "./workflows/workflow-config"; 10 | import VariantCallingStateMachine from "./workflows/variant-calling-stack"; 11 | 12 | export class AwsGenomicsCdkStack extends cdk.Stack { 13 | constructor(scope: cdk.Construct, id: string, props: cdk.StackProps) { 14 | super(scope, id, props); 15 | 16 | // Create a new VPC or use an existing one 17 | let vpc: ec2.Vpc; 18 | if (config.VPC.createVPC) { 19 | vpc = new GenomicsVpcStack(this, config.VPC.VPCName, props).vpc; 20 | } else { 21 | vpc = ec2.Vpc.fromLookup(this, `${config.projectName}-vpc-lookup`, { 22 | vpcName: config.VPC.VPCName, 23 | }) as ec2.Vpc; 24 | } 25 | 26 | // Create a new bucket if set in the config 27 | if (!config.S3.existingBucket) { 28 | const bucketProps = { 29 | bucketName: config.S3.bucketName, 30 | encryption: s3.BucketEncryption.S3_MANAGED, 31 | removalPolicy: cdk.RemovalPolicy.RETAIN, 32 | }; 33 | 34 | new s3.Bucket(this, bucketProps.bucketName, bucketProps); 35 | } 36 | 37 | // Create an AWS Batch resources 38 | const batchProps = { 39 | stackProps: props, 40 | vpc: vpc, 41 | bucket: config.S3.bucketName, 42 | }; 43 | 44 | const batch = new GenomicsBatchStack( 45 | this, 46 | `${config.projectName}-batch`, 47 | batchProps 48 | ); 49 | 50 | // loop throgh the app.config workflows file and set infrastructure for 51 | // the provided workflows 52 | let workflow: WorkflowConfig; 53 | for (let i = 0; i < config.workflows.length; i++) { 54 | workflow = config.workflows[i] as WorkflowConfig; 55 | 56 | switch (workflow.name) { 57 | case "variantCalling": 58 | new VariantCallingStateMachine( 59 | this, 60 | `${config.projectName}-${workflow.name}`, 61 | { 62 | stackProps: props, 63 | batchQueue: 64 | workflow.spot === true 65 | ? batch.genomicsDefaultQueue 66 | : batch.genomicsHighPriorityQueue, 67 | taskRole: batch.taskRole, 68 | } 69 | ); 70 | break; 71 | } 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/batch/batch-compute-environmnet-construct.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as batch from "@aws-cdk/aws-batch"; 3 | import * as ec2 from "@aws-cdk/aws-ec2"; 4 | import * as ecs from "@aws-cdk/aws-ecs"; 5 | import * as iam from "@aws-cdk/aws-iam"; 6 | import * as config from "../../app.config.json"; 7 | 8 | export class GenomicsComputeEnvironmentProps { 9 | readonly computeResourcesType?: batch.ComputeResourceType; 10 | readonly vpc: ec2.Vpc; 11 | readonly allocationStrategy?: batch.AllocationStrategy; 12 | readonly computeResourcesTags?: { [key: string]: string }; 13 | readonly instanceProfileArn: string; 14 | readonly fleetRole: iam.Role; 15 | readonly serviceRole: iam.Role; 16 | readonly instanceTypes: ec2.InstanceType[]; 17 | readonly launchTemplateName: string; 18 | readonly maxvCpus: number; 19 | readonly computeEnvironmentName: string; 20 | } 21 | 22 | export default class GenomicsComputeEnvironment extends cdk.Construct { 23 | public readonly computeEnvironment: batch.ComputeEnvironment; 24 | 25 | constructor( 26 | scope: cdk.Construct, 27 | id: string, 28 | props: GenomicsComputeEnvironmentProps 29 | ) { 30 | super(scope, id); 31 | 32 | const computeResources = { 33 | type: props.computeResourcesType ?? batch.ComputeResourceType.SPOT, 34 | vpc: props.vpc, 35 | allocationStrategy: 36 | props.allocationStrategy ?? 37 | batch.AllocationStrategy.SPOT_CAPACITY_OPTIMIZED, 38 | computeResourcesTags: props.computeResourcesTags ?? { 39 | Name: `${config.projectName}-instance` 40 | }, 41 | image: ecs.EcsOptimizedImage.amazonLinux2(), 42 | instanceRole: props.instanceProfileArn, 43 | spotFleetRole: props.fleetRole, 44 | serviceRole: props.serviceRole, 45 | instanceTypes: props.instanceTypes, 46 | launchTemplate: { 47 | launchTemplateName: props.launchTemplateName, 48 | }, 49 | maxvCpus: props.maxvCpus, 50 | }; 51 | 52 | const computeEnvironmentProps = { 53 | computeEnvironmentName: props.computeEnvironmentName, 54 | enabled: true, 55 | managed: true, 56 | serviceRole: props.serviceRole, 57 | computeResources: computeResources, 58 | }; 59 | 60 | this.computeEnvironment = new batch.ComputeEnvironment( 61 | this, 62 | computeEnvironmentProps.computeEnvironmentName, 63 | computeEnvironmentProps 64 | ); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/batch/batch-iam-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as iam from "@aws-cdk/aws-iam"; 3 | import * as path from "path"; 4 | import * as fs from "fs"; 5 | import * as config from "../../app.config.json"; 6 | 7 | export interface GenomicsIamProps { 8 | readonly bucketName: string; 9 | readonly account: string; 10 | } 11 | 12 | export default class GenomicsIam extends cdk.Stack { 13 | public readonly serviceRole: iam.Role; 14 | public readonly taskRole: iam.Role; 15 | public readonly instanceProfileArn: string; 16 | public readonly fleetRole: iam.Role; 17 | 18 | constructor(scope: cdk.Construct, id: string, props: GenomicsIamProps) { 19 | super(scope, id); 20 | 21 | // Create a task role to be used by AWS batch container 22 | const taskRoleProps = { 23 | roleName: `${config.projectName}-ecs-task-role`, 24 | assumedBy: new iam.ServicePrincipal("ecs-tasks.amazonaws.com"), 25 | description: "allow ecs task to assume a role for the genomics pipleine", 26 | managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3ReadOnlyAccess")] 27 | }; 28 | 29 | this.taskRole = new iam.Role(this, taskRoleProps.roleName, taskRoleProps); 30 | 31 | 32 | // Create an instance role for the EC2 host machine for AWS Batch 33 | const instanceRoleProps = { 34 | roleName: `${config.projectName}-batch-instance-role`, 35 | assumedBy: new iam.ServicePrincipal("ec2.amazonaws.com"), 36 | description: "allow ec2 instance to assume a role for the genomics pipleine", 37 | managedPolicies: [ 38 | iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonEC2ContainerServiceforEC2Role"), 39 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3ReadOnlyAccess"), 40 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSSMManagedInstanceCore") 41 | ] 42 | }; 43 | 44 | const instanceRole = new iam.Role(this, instanceRoleProps.roleName, instanceRoleProps); 45 | 46 | 47 | // Create a spot fleet role to be used by AWS Batch when launching spot instances 48 | const fleetRoleProps = { 49 | roleName: `${config.projectName}-spot-fleet-role`, 50 | assumedBy: new iam.ServicePrincipal("ec2.amazonaws.com"), 51 | description: "allow ec2 instance to assume a role for the genomics pipleine", 52 | managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonEC2SpotFleetTaggingRole")] 53 | }; 54 | 55 | this.fleetRole = new iam.Role(this, fleetRoleProps.roleName, fleetRoleProps); 56 | 57 | 58 | // Create a service role for AWS Batch so it can assume other roles for the genomics pipeline 59 | const batchServiceRoleProps = { 60 | roleName: `${config.projectName}-batch-service-role`, 61 | assumedBy: new iam.ServicePrincipal("batch.amazonaws.com"), 62 | description: "allow batch to assume a role for the genomics pipleine", 63 | managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AWSBatchServiceRole")] 64 | }; 65 | 66 | this.serviceRole = new iam.Role(this, batchServiceRoleProps.roleName, batchServiceRoleProps); 67 | 68 | 69 | // Create a policy to allow read and writes for an S3 bucket and add it to the task and instance roles 70 | const filePath = path.join(__dirname, "../../assets/genomics-policy-s3.json"); 71 | const bucketPolicy = fs.readFileSync(filePath, {encoding: "utf-8"}).replace(/BUCKET_NAME/g, props.bucketName); 72 | 73 | const policyProps = { 74 | policyName: `${config.projectName}-policy-s3`, 75 | document: iam.PolicyDocument.fromJson(JSON.parse(bucketPolicy)), 76 | force: true, 77 | roles: [this.taskRole, instanceRole] 78 | } 79 | const policy = new iam.Policy(this, policyProps.policyName, policyProps); 80 | 81 | 82 | // Create an instance profile to be used by AWS Batch compute environment 83 | const instanceProfileProps = { 84 | roles: [instanceRoleProps.roleName], 85 | instanceProfileName: `${config.projectName}-batch-instance-profile` 86 | }; 87 | const instanceProfile = new iam.CfnInstanceProfile(this, instanceProfileProps.instanceProfileName, instanceProfileProps); 88 | this.instanceProfileArn = `arn:aws:iam::${props.account}:instance-profile/${instanceProfileProps.instanceProfileName}`; 89 | } 90 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/batch/batch-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as batch from "@aws-cdk/aws-batch"; 3 | import * as ec2 from "@aws-cdk/aws-ec2"; 4 | import * as iam from "@aws-cdk/aws-iam"; 5 | import GenomicsComputeEnvironment from "./batch-compute-environmnet-construct"; 6 | import GenomicsLaunchTemplate from "./launch-template-construct"; 7 | import GenomicsJobQueue from "./job-queue-construct"; 8 | import GenomicsIam from "./batch-iam-stack"; 9 | import * as config from "../../app.config.json"; 10 | 11 | 12 | export interface GenomicsBatchStackProps { 13 | readonly stackProps: cdk.StackProps; 14 | readonly vpc: ec2.Vpc; 15 | readonly bucket: string; 16 | }; 17 | 18 | export default class GenomicsBatchStack extends cdk.Stack { 19 | 20 | public readonly genomicsDefaultQueue: batch.JobQueue; 21 | public readonly genomicsHighPriorityQueue: batch.JobQueue; 22 | public readonly taskRole: iam.Role; 23 | 24 | constructor(scope: cdk.Construct, id: string, props: GenomicsBatchStackProps) { 25 | super(scope, id, props.stackProps); 26 | 27 | const env = props.stackProps.env as cdk.Environment; 28 | 29 | // Create IAM roles and policies for AWS Batch 30 | const genomicsIamProps = { 31 | bucketName: props.bucket, 32 | account: env.account as string 33 | } 34 | 35 | const genomicsIam = new GenomicsIam(this, `${config.projectName}-iam`, genomicsIamProps); 36 | this.taskRole = genomicsIam.taskRole; 37 | 38 | 39 | 40 | // Create a EC2 Launch Template to be used by AWS Batch 41 | const launchTemplateProps = { 42 | launchTemplateName: `${config.projectName}-launch-template`, 43 | volumeSize: config.batch.defaultVolumeSize 44 | }; 45 | 46 | const launchTemplate = new GenomicsLaunchTemplate(this, launchTemplateProps.launchTemplateName, launchTemplateProps); 47 | 48 | 49 | // Create AWS Batch SPOT and On-Demand compute environments 50 | let envInstanceType = []; 51 | for (let i = 0; i < config.batch.instanceTypes.length; i++) { 52 | envInstanceType.push(new ec2.InstanceType(config.batch.instanceTypes[i])); 53 | } 54 | 55 | // Create spot compute environment for the genomics pipeline using SPOT instances 56 | const spotComputeEnvironmentProps = { 57 | computeEnvironmentName: `${config.projectName}-spot-compute-environment`, 58 | vpc: props.vpc, 59 | instanceTypes: envInstanceType, 60 | maxvCpus: config.batch.spotMaxVCPUs, 61 | instanceProfileArn: genomicsIam.instanceProfileArn, 62 | fleetRole: genomicsIam.fleetRole, 63 | serviceRole: genomicsIam.serviceRole, 64 | launchTemplateName: launchTemplate.template.launchTemplateName as string, 65 | }; 66 | 67 | const spotComputeEnvironment = new GenomicsComputeEnvironment(this, 68 | spotComputeEnvironmentProps.computeEnvironmentName, 69 | spotComputeEnvironmentProps 70 | ); 71 | 72 | // Create on demand compute environment using on demand instances 73 | const onDemandComputeEnvironmentProps = { 74 | computeEnvironmentName: `${config.projectName}-on-demand-compute-environment`, 75 | computeResourcesType: batch.ComputeResourceType.ON_DEMAND, 76 | allocationStrategy: batch.AllocationStrategy.BEST_FIT, 77 | vpc: props.vpc, 78 | instanceTypes: envInstanceType, 79 | maxvCpus: config.batch.onDemendMaxVCPUs, 80 | instanceProfileArn: genomicsIam.instanceProfileArn, 81 | fleetRole: genomicsIam.fleetRole, 82 | serviceRole: genomicsIam.serviceRole, 83 | launchTemplateName: launchTemplate.template.launchTemplateName as string, 84 | }; 85 | 86 | const onDemandComputeEnvironment = new GenomicsComputeEnvironment(this, 87 | onDemandComputeEnvironmentProps.computeEnvironmentName, 88 | onDemandComputeEnvironmentProps 89 | ); 90 | 91 | 92 | // Create default queue, using spot first and then on-demand instances 93 | const defaultQueueProps = { 94 | computeEnvironments: [ 95 | spotComputeEnvironment.computeEnvironment 96 | ], 97 | jobQueueName: `${config.projectName}-default-queue`, 98 | priority: 100 99 | }; 100 | 101 | const defaultQueue = new GenomicsJobQueue(this, defaultQueueProps.jobQueueName, defaultQueueProps); 102 | this.genomicsDefaultQueue = defaultQueue.jobQueue; 103 | 104 | 105 | // Create high priority queue, using on-demand instances and then spot 106 | const highPriorityQueueProps = { 107 | computeEnvironments: [ 108 | onDemandComputeEnvironment.computeEnvironment, 109 | spotComputeEnvironment.computeEnvironment 110 | 111 | ], 112 | jobQueueName: `${config.projectName}-high-priority-queue`, 113 | priority: 1000 114 | } 115 | 116 | const highPriorityQueue = new GenomicsJobQueue(this, highPriorityQueueProps.jobQueueName, highPriorityQueueProps); 117 | this.genomicsHighPriorityQueue = highPriorityQueue.jobQueue; 118 | 119 | 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/batch/job-queue-construct.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as batch from "@aws-cdk/aws-batch"; 3 | 4 | export interface GenomicsJobQueueProps { 5 | readonly computeEnvironments: batch.ComputeEnvironment[]; 6 | readonly jobQueueName: string; 7 | readonly priority: number; 8 | } 9 | 10 | export default class GenomicsJobQueue extends cdk.Construct { 11 | public readonly jobQueue: batch.JobQueue; 12 | 13 | constructor(scope: cdk.Construct, id: string, props: GenomicsJobQueueProps) { 14 | super(scope, id); 15 | 16 | let environments = []; 17 | for (let i = 0; i < props.computeEnvironments.length; i++) { 18 | let environment = { 19 | computeEnvironment: props.computeEnvironments[i], 20 | order: i + 1, 21 | }; 22 | 23 | environments.push(environment); 24 | } 25 | 26 | let jobQueueProps = { 27 | jobQueueName: props.jobQueueName, 28 | priority: props.priority, 29 | computeEnvironments: environments, 30 | }; 31 | 32 | this.jobQueue = new batch.JobQueue( 33 | this, 34 | jobQueueProps.jobQueueName, 35 | jobQueueProps 36 | ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/batch/launch-template-construct.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as ec2 from "@aws-cdk/aws-ec2"; 3 | import * as path from "path"; 4 | import * as fs from "fs"; 5 | 6 | 7 | export interface GenomicsLaunchTemplateProps { 8 | readonly launchTemplateName: string; 9 | readonly volumeSize: number; 10 | readonly volumeType?: string; 11 | readonly encrypted?: boolean; 12 | readonly userData?: string; 13 | } 14 | 15 | export default class GenomicsLaunchTemplate extends cdk.Construct { 16 | public readonly template: ec2.CfnLaunchTemplate; 17 | 18 | constructor( 19 | scope: cdk.Construct, 20 | id: string, 21 | props: GenomicsLaunchTemplateProps 22 | ) { 23 | super(scope, id); 24 | 25 | let userData; 26 | 27 | if (props.userData !== undefined) { 28 | userData = props.userData; 29 | } else { 30 | const filePath = path.join( 31 | __dirname, 32 | "../../assets/launch_template_user_data.txt" 33 | ); 34 | userData = fs.readFileSync(filePath).toString("base64"); 35 | } 36 | 37 | const launchTemplateProps = { 38 | launchTemplateName: props.launchTemplateName, 39 | launchTemplateData: { 40 | blockDeviceMappings: [ 41 | { 42 | deviceName: "/dev/xvda", 43 | ebs: { 44 | encrypted: props.encrypted ?? true, 45 | volumeSize: props.volumeSize, 46 | volumeType: props.volumeType ?? "gp2", 47 | }, 48 | }, 49 | ], 50 | userData: userData, 51 | }, 52 | }; 53 | 54 | this.template = new ec2.CfnLaunchTemplate( 55 | this, 56 | props.launchTemplateName, 57 | launchTemplateProps 58 | ); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/vpc/vpc-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as ec2 from "@aws-cdk/aws-ec2"; 3 | import * as config from "../../app.config.json"; 4 | 5 | export default class GenomicsVpcStack extends cdk.Stack { 6 | public readonly vpc: ec2.Vpc; 7 | 8 | constructor(scope: cdk.Construct, id: string, props: cdk.StackProps) { 9 | super(scope, id, props); 10 | 11 | const subnetConf = [ 12 | { 13 | cidrMask: config.VPC.cidrMask, 14 | name: "private", 15 | subnetType: ec2.SubnetType.PRIVATE, 16 | }, 17 | { 18 | cidrMask: config.VPC.cidrMask, 19 | name: "public", 20 | subnetType: ec2.SubnetType.PUBLIC, 21 | } 22 | ]; 23 | 24 | const vpcProp = { 25 | cidr: config.VPC.cidr, 26 | maxAZs: config.VPC.maxAZs, 27 | subnetConfiguration: subnetConf 28 | }; 29 | 30 | this.vpc = new ec2.Vpc(this, config.VPC.VPCName, vpcProp); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/workflows/genomics-task-construct.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as batch from "@aws-cdk/aws-batch"; 3 | import * as sfn from "@aws-cdk/aws-stepfunctions"; 4 | import * as tasks from "@aws-cdk/aws-stepfunctions-tasks"; 5 | 6 | export interface GenomicsTaskProps { 7 | readonly taskName: string; 8 | readonly command: string[]; 9 | readonly jobDefinition: batch.JobDefinition; 10 | readonly queue: batch.JobQueue; 11 | readonly awsCliPath?: string; 12 | readonly environment?: { [key: string]: string }; 13 | } 14 | 15 | export default class GenomicsTask extends cdk.Construct { 16 | 17 | public readonly task: tasks.BatchSubmitJob; 18 | 19 | constructor(scope: cdk.Construct, id: string, props: GenomicsTaskProps) { 20 | super(scope, id); 21 | 22 | const defaultEnvironment = { 23 | JOB_WORKFLOW_NAME: sfn.JsonPath.stringAt("$$.StateMachine.Name"), 24 | JOB_WORKFLOW_EXECUTION: sfn.JsonPath.stringAt("$$.Execution.Name"), 25 | JOB_OUTPUT_PREFIX: sfn.JsonPath.stringAt("$.params.environment.JOB_OUTPUT_PREFIX"), 26 | JOB_AWS_CLI_PATH: props.awsCliPath ?? "/opt/aws-cli/bin" 27 | } 28 | 29 | let environment; 30 | if(props.environment){ 31 | environment = {...defaultEnvironment, ...props.environment}; 32 | } 33 | else{ 34 | environment = defaultEnvironment; 35 | } 36 | 37 | const taskContainerProps = { 38 | command: props.command, 39 | environment: environment 40 | }; 41 | const taskProps = { 42 | jobName: props.taskName, 43 | jobDefinitionArn: props.jobDefinition.jobDefinitionArn, 44 | jobQueueArn: props.queue.jobQueueArn, 45 | containerOverrides: taskContainerProps, 46 | inputPath: "$", 47 | resultPath: "$.result" 48 | }; 49 | 50 | this.task = new tasks.BatchSubmitJob(this, taskProps.jobName, taskProps); 51 | } 52 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/workflows/job-definition-construct.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as batch from "@aws-cdk/aws-batch"; 3 | import * as ecs from "@aws-cdk/aws-ecs"; 4 | 5 | import {GenomicsJobDefinitionProps} from "./job-definitions"; 6 | 7 | export default class GenomicsJobDefinition extends cdk.Construct{ 8 | 9 | public readonly jobDefinition: batch.JobDefinition; 10 | 11 | constructor(scope: cdk.Construct, id: string, props: GenomicsJobDefinitionProps) { 12 | super(scope, id); 13 | 14 | const repositoryUri = `${props.env.account}.dkr.ecr.${props.env.region}.amazonaws.com/${props.repository}`; 15 | const containerImage = ecs.ContainerImage.fromRegistry(repositoryUri); 16 | 17 | const mountPoints = [ 18 | { 19 | containerPath: "/opt/aws-cli", 20 | readOnly: false, 21 | sourceVolume: "awscli" 22 | }, 23 | { 24 | containerPath: "/data", 25 | readOnly: false, 26 | sourceVolume: "data" 27 | } 28 | ]; 29 | 30 | const volumes = [ 31 | { 32 | name: "awscli", 33 | host: { sourcePath: "/opt/aws-cli" } 34 | }, 35 | { 36 | name: "data", 37 | host: { sourcePath: "/data" } 38 | } 39 | ]; 40 | 41 | const jobDefinitionContainerProps = { 42 | image: containerImage, 43 | jobRole: props.jobRole, 44 | memoryLimitMiB: props.memoryLimit, 45 | mountPoints: mountPoints, 46 | volumes: volumes, 47 | vcpus: props.vcpus ?? 1 48 | }; 49 | 50 | const jobDefinitionProps = { 51 | container: jobDefinitionContainerProps, 52 | jobDefinitionName: id, 53 | retryAttempts: props.retryAttempts ?? 1, 54 | timeout: cdk.Duration.seconds(props.timeout ?? 3600) 55 | }; 56 | 57 | this.jobDefinition = new batch.JobDefinition(this, id, jobDefinitionProps); 58 | } 59 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/workflows/job-definitions.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "@aws-cdk/core"; 2 | import * as iam from "@aws-cdk/aws-iam"; 3 | 4 | export enum GenomicsJobDefinitionTypes { 5 | FASTQC = "fastqc", 6 | MINIMAP2 = "minimap2", 7 | GATK = "gatk", 8 | BWA = "bwa", 9 | SAMTOOLS = "samtools", 10 | PICARD = "picard" 11 | } 12 | 13 | export interface GenomicsJobDefinitionProps { 14 | readonly repository: string; 15 | readonly jobDefinitionName?: string; 16 | readonly retryAttempts?: number; 17 | readonly timeout?: number; 18 | readonly env: cdk.ResourceEnvironment; 19 | readonly stack: cdk.Stack; 20 | readonly jobRole: iam.Role; 21 | readonly memoryLimit?: number; 22 | readonly vcpus?: number; 23 | } 24 | 25 | export class JobDefinitionBase implements GenomicsJobDefinitionProps { 26 | public repository: string; 27 | public jobDefinitionName: string; 28 | public retryAttempts?: number; 29 | public timeout?: number; 30 | public env: cdk.ResourceEnvironment; 31 | public stack: cdk.Stack; 32 | public jobRole: iam.Role; 33 | public memoryLimit?: number; 34 | public vcpus?: number; 35 | 36 | constructor() { 37 | this.retryAttempts = 1; 38 | this.timeout = 3600; 39 | this.memoryLimit = 16000; 40 | this.vcpus = 8; 41 | } 42 | } 43 | 44 | export class FastQcJobDefinition extends JobDefinitionBase { 45 | constructor(props: GenomicsJobDefinitionProps) { 46 | super(); 47 | this.repository = props.repository; 48 | this.jobDefinitionName = GenomicsJobDefinitionTypes.FASTQC; 49 | this.retryAttempts = props.retryAttempts ?? this.retryAttempts; 50 | this.timeout = props.timeout ?? this.timeout; 51 | this.env = props.env; 52 | this.stack = props.stack; 53 | this.jobRole = props.jobRole; 54 | this.memoryLimit = props.memoryLimit ?? this.memoryLimit; 55 | this.vcpus = props.vcpus ?? this.vcpus; 56 | } 57 | } 58 | 59 | export class Minimap2JObDefinition extends JobDefinitionBase { 60 | constructor(props: GenomicsJobDefinitionProps) { 61 | super(); 62 | this.repository = props.repository; 63 | this.jobDefinitionName = GenomicsJobDefinitionTypes.MINIMAP2; 64 | this.retryAttempts = props.retryAttempts ?? this.retryAttempts; 65 | this.timeout = props.timeout ?? this.timeout; 66 | this.env = props.env; 67 | this.stack = props.stack; 68 | this.jobRole = props.jobRole; 69 | this.memoryLimit = props.memoryLimit ?? this.memoryLimit; 70 | this.vcpus = props.vcpus ?? this.vcpus; 71 | } 72 | } 73 | 74 | export class GatkJObDefinition extends JobDefinitionBase { 75 | constructor(props: GenomicsJobDefinitionProps) { 76 | super(); 77 | this.repository = props.repository; 78 | this.jobDefinitionName = GenomicsJobDefinitionTypes.GATK; 79 | this.retryAttempts = props.retryAttempts ?? this.retryAttempts; 80 | this.timeout = props.timeout ?? this.timeout; 81 | this.env = props.env; 82 | this.stack = props.stack; 83 | this.jobRole = props.jobRole; 84 | this.memoryLimit = props.memoryLimit ?? this.memoryLimit; 85 | this.vcpus = props.vcpus ?? this.vcpus; 86 | } 87 | } 88 | 89 | export class BwaJObDefinition extends JobDefinitionBase { 90 | constructor(props: GenomicsJobDefinitionProps) { 91 | super(); 92 | this.repository = props.repository; 93 | this.jobDefinitionName = GenomicsJobDefinitionTypes.BWA; 94 | this.retryAttempts = props.retryAttempts ?? this.retryAttempts; 95 | this.timeout = props.timeout ?? this.timeout; 96 | this.env = props.env; 97 | this.stack = props.stack; 98 | this.jobRole = props.jobRole; 99 | this.memoryLimit = props.memoryLimit ?? this.memoryLimit; 100 | this.vcpus = props.vcpus ?? this.vcpus; 101 | } 102 | } 103 | 104 | export class SamToolsJObDefinition extends JobDefinitionBase { 105 | constructor(props: GenomicsJobDefinitionProps) { 106 | super(); 107 | this.repository = props.repository; 108 | this.jobDefinitionName = GenomicsJobDefinitionTypes.SAMTOOLS; 109 | this.retryAttempts = props.retryAttempts ?? this.retryAttempts; 110 | this.timeout = props.timeout ?? this.timeout; 111 | this.env = props.env; 112 | this.stack = props.stack; 113 | this.jobRole = props.jobRole; 114 | this.memoryLimit = props.memoryLimit ?? this.memoryLimit; 115 | this.vcpus = props.vcpus ?? this.vcpus; 116 | } 117 | } 118 | 119 | export class PicardJObDefinition extends JobDefinitionBase { 120 | constructor(props: GenomicsJobDefinitionProps) { 121 | super(); 122 | this.repository = props.repository; 123 | this.jobDefinitionName = GenomicsJobDefinitionTypes.PICARD; 124 | this.retryAttempts = props.retryAttempts ?? this.retryAttempts; 125 | this.timeout = props.timeout ?? this.timeout; 126 | this.env = props.env; 127 | this.stack = props.stack; 128 | this.jobRole = props.jobRole; 129 | this.memoryLimit = props.memoryLimit ?? this.memoryLimit; 130 | this.vcpus = props.vcpus ?? this.vcpus; 131 | } 132 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/lib/workflows/workflow-config.ts: -------------------------------------------------------------------------------- 1 | export interface WorkflowConfig { 2 | readonly name: string; 3 | readonly spot: boolean; 4 | } -------------------------------------------------------------------------------- /src/aws-genomics-cdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "aws-genomics-cdk", 3 | "version": "0.1.0", 4 | "bin": { 5 | "aws-genomics-cdk": "bin/aws-genomics-cdk.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@aws-cdk/assert": "1.128.0", 15 | "@types/node": "16.11.0", 16 | "aws-cdk": "1.128.0", 17 | "ts-node": "^10.3.0", 18 | "typescript": "~4.4.4" 19 | }, 20 | "dependencies": { 21 | "@aws-cdk/aws-batch": "^1.128.0", 22 | "@aws-cdk/aws-ec2": "^1.128.0", 23 | "@aws-cdk/aws-stepfunctions": "^1.128.0", 24 | "@aws-cdk/aws-stepfunctions-tasks": "^1.128.0", 25 | "@aws-cdk/core": "1.128.0", 26 | "source-map-support": "^0.5.20" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/test/aws-genomics-cdk.test.ts: -------------------------------------------------------------------------------- 1 | import { expect as expectCDK, matchTemplate, MatchStyle } from '@aws-cdk/assert'; 2 | import * as cdk from '@aws-cdk/core'; 3 | import * as AwsGenomicsCdk from '../lib/aws-genomics-cdk-stack'; 4 | 5 | test('Empty Stack', () => { 6 | const app = new cdk.App(); 7 | // WHEN 8 | const stack = new AwsGenomicsCdk.AwsGenomicsCdkStack(app, 'MyTestStack'); 9 | // THEN 10 | expectCDK(stack).to(matchTemplate({ 11 | "Resources": {} 12 | }, MatchStyle.EXACT)) 13 | }); 14 | -------------------------------------------------------------------------------- /src/aws-genomics-cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": ["es2018"], 6 | "declaration": true, 7 | "strict": true, 8 | "noImplicitAny": true, 9 | "strictNullChecks": true, 10 | "noImplicitThis": true, 11 | "alwaysStrict": true, 12 | "noUnusedLocals": false, 13 | "noUnusedParameters": false, 14 | "noImplicitReturns": true, 15 | "noFallthroughCasesInSwitch": false, 16 | "inlineSourceMap": true, 17 | "inlineSources": true, 18 | "experimentalDecorators": true, 19 | "strictPropertyInitialization": false, 20 | "typeRoots": ["./node_modules/@types"], 21 | "resolveJsonModule": true 22 | }, 23 | "exclude": ["cdk.out"] 24 | } 25 | -------------------------------------------------------------------------------- /src/containers/.gitignore: -------------------------------------------------------------------------------- 1 | job-definition.json -------------------------------------------------------------------------------- /src/containers/_common/README.md: -------------------------------------------------------------------------------- 1 | # Common assets for tooling containers 2 | 3 | These are assets that are used to build all tooling containers. 4 | 5 | * `build.sh`: a generic build script that first builds a base image for a container, then builds an AWS specific image 6 | * `entrypoint.aws.sh`: a generic entrypoint script that wraps a call to a binary tool in the container with handlers data staging from/to S3 7 | -------------------------------------------------------------------------------- /src/containers/_common/aws.dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE 2 | FROM ${BASE_IMAGE}:latest 3 | 4 | RUN apt-get update 5 | RUN apt-get install -y gettext-base 6 | RUN apt-get clean 7 | 8 | ENV PATH=/opt/bin:$PATH 9 | 10 | COPY _common/entrypoint.aws.sh /opt/bin/entrypoint.aws.sh 11 | RUN chmod +x /opt/bin/entrypoint.aws.sh 12 | 13 | WORKDIR /scratch 14 | 15 | ENTRYPOINT ["entrypoint.aws.sh"] 16 | -------------------------------------------------------------------------------- /src/containers/_common/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | IMAGE_NAME=$1 5 | IMAGE_TAG=$2 6 | 7 | echo "Docker Login to ECR" 8 | eval $(aws ecr get-login --no-include-email --region ${AWS_REGION}) 9 | 10 | # retrieve image layer cache from previously built build stage 11 | docker pull ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} || true 12 | 13 | # (re)build just the build stage of the image 14 | docker build \ 15 | --target build \ 16 | --cache-from ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} \ 17 | --build-arg VERSION=$IMAGE_TAG \ 18 | -t ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} . 19 | 20 | # build the base image 21 | docker build \ 22 | --cache-from ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} \ 23 | --build-arg VERSION=$IMAGE_TAG \ 24 | -t $IMAGE_NAME . 25 | 26 | # build the image with an AWS specific entrypoint 27 | docker build \ 28 | --build-arg BASE_IMAGE=$IMAGE_NAME \ 29 | -t $IMAGE_NAME:$IMAGE_TAG \ 30 | -t $IMAGE_NAME:latest \ 31 | -f _common/aws.dockerfile . -------------------------------------------------------------------------------- /src/containers/_common/push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | IMAGE_NAME=$1 5 | IMAGE_TAG=$2 6 | 7 | echo "Docker Login to ECR" 8 | eval $(aws ecr get-login --no-include-email --region ${AWS_REGION}) 9 | 10 | # # this script expects the image repository to be created by CFN stack prior to build 11 | # 12 | # # alternatively, you can create the image repository directly via the aws cli if it does not exist 13 | # aws ecr describe-repositories --repository-names ${IMAGE_NAME} \ 14 | # || aws ecr create-repository --repository-name ${IMAGE_NAME} 15 | # 16 | # # and add an appropriate lifecycle policy 17 | # lifecycle_policy=$(cat < $NF_CONFIG 45 | workDir = "$NF_WORKDIR" 46 | process.executor = "awsbatch" 47 | process.queue = "$NF_JOB_QUEUE" 48 | aws.batch.cliPath = "$AWS_CLI_PATH" 49 | EOF 50 | 51 | if [[ "$EFS_MOUNT" != "" ]] 52 | then 53 | echo aws.batch.volumes = [\"/mnt/efs\"] >> $NF_CONFIG 54 | fi 55 | 56 | echo "=== CONFIGURATION ===" 57 | cat ./nextflow.config 58 | 59 | # stage in session cache 60 | # .nextflow directory holds all session information for the current and past runs. 61 | # it should be `sync`'d with an s3 uri, so that runs from previous sessions can be 62 | # resumed 63 | echo "== Restoring Session Cache ==" 64 | aws s3 sync --no-progress $NF_LOGSDIR/.nextflow .nextflow 65 | 66 | function preserve_session() { 67 | # stage out session cache 68 | if [ -d .nextflow ]; then 69 | echo "== Preserving Session Cache ==" 70 | aws s3 sync --no-progress .nextflow $NF_LOGSDIR/.nextflow 71 | fi 72 | 73 | # .nextflow.log file has more detailed logging from the workflow run and is 74 | # nominally unique per run. 75 | # 76 | # when run locally, .nextflow.logs are automatically rotated 77 | # when syncing to S3 uniquely identify logs by the batch GUID 78 | if [ -f .nextflow.log ]; then 79 | echo "== Preserving Session Log ==" 80 | aws s3 cp --no-progress .nextflow.log $NF_LOGSDIR/.nextflow.log.${GUID/\//.} 81 | fi 82 | } 83 | 84 | function show_log() { 85 | echo "=== Nextflow Log ===" 86 | cat ./.nextflow.log 87 | } 88 | 89 | function cleanup() { 90 | set +e 91 | wait $NEXTFLOW_PID 92 | set -e 93 | echo "=== Running Cleanup ===" 94 | 95 | show_log 96 | preserve_session 97 | 98 | echo "=== Bye! ===" 99 | } 100 | 101 | function cancel() { 102 | # AWS Batch sends a SIGTERM to a container if its job is cancelled/terminated 103 | # forward this signal to Nextflow so that it can cancel any pending workflow jobs 104 | 105 | set +e # ignore errors here 106 | echo "=== !! CANCELLING WORKFLOW !! ===" 107 | echo "stopping nextflow pid: $NEXTFLOW_PID" 108 | kill -TERM "$NEXTFLOW_PID" 109 | echo "waiting .." 110 | wait $NEXTFLOW_PID 111 | echo "=== !! cancellation complete !! ===" 112 | set -e 113 | } 114 | 115 | trap "cancel; cleanup" TERM 116 | trap "cleanup" EXIT 117 | 118 | # stage workflow definition 119 | if [[ "$NEXTFLOW_PROJECT" =~ ^s3://.* ]]; then 120 | echo "== Staging S3 Project ==" 121 | aws s3 sync --no-progress --exclude 'runs/*' --exclude '.*' $NEXTFLOW_PROJECT ./project 122 | NEXTFLOW_PROJECT=./project 123 | fi 124 | 125 | echo "== Running Workflow ==" 126 | echo "nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS" 127 | export NXF_ANSI_LOG=false 128 | nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS & 129 | 130 | NEXTFLOW_PID=$! 131 | echo "nextflow pid: $NEXTFLOW_PID" 132 | jobs 133 | echo "waiting .." 134 | wait $NEXTFLOW_PID 135 | -------------------------------------------------------------------------------- /src/containers/samtools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build 2 | 3 | ARG VERSION=1.9 4 | 5 | # Metadata 6 | LABEL container.base.image="ubuntu:18.04" 7 | LABEL software.name="SAMtools" 8 | LABEL software.version=${VERSION} 9 | LABEL software.description="Utilities for the Sequence Alignment/Map (SAM/BAM/CRAM) formats" 10 | LABEL software.website="http://www.htslib.org" 11 | LABEL software.documentation="http://www.htslib.org/doc/samtools.html" 12 | LABEL software.license="MIT/Expat" 13 | LABEL tags="Genomics" 14 | 15 | # System and library dependencies 16 | RUN apt-get -y update && \ 17 | apt-get -y install \ 18 | autoconf \ 19 | automake \ 20 | make \ 21 | gcc \ 22 | perl \ 23 | zlib1g-dev \ 24 | libbz2-dev \ 25 | liblzma-dev \ 26 | libcurl4-gnutls-dev \ 27 | libssl-dev \ 28 | libncurses5-dev \ 29 | wget && \ 30 | apt-get clean 31 | 32 | # Application installation 33 | RUN wget -O /samtools-${VERSION}.tar.bz2 \ 34 | https://github.com/samtools/samtools/releases/download/${VERSION}/samtools-${VERSION}.tar.bz2 && \ 35 | tar xvjf /samtools-${VERSION}.tar.bz2 && rm /samtools-${VERSION}.tar.bz2 36 | 37 | WORKDIR /samtools-${VERSION} 38 | RUN ./configure && make 39 | 40 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final 41 | COPY --from=build /samtools-*/samtools /usr/local/bin 42 | 43 | RUN apt-get -y update && \ 44 | apt-get -y install \ 45 | libcurl3-gnutls && \ 46 | apt-get clean 47 | 48 | ENTRYPOINT ["samtools"] 49 | -------------------------------------------------------------------------------- /src/ebs-autoscale/README.md: -------------------------------------------------------------------------------- 1 | # Amazon Elastic Block Store Autoscale 2 | 3 | ## RELOCATION NOTICE 4 | 5 | The code for this daemon has been moved to the following repoository: 6 | [awslabs/amazon-ebs-autoscale](https://github.com/awslabs/amazon-ebs-autoscale) 7 | -------------------------------------------------------------------------------- /src/ebs-autoscale/bin/init-ebs-autoscale.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # this is a shim for backwards compatibility for releases <2.6.0 6 | # old steps: 7 | # - cd /opt && wget $artifactRootUrl/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz 8 | # - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh $scratchPath /dev/sdc 2>&1 > /var/log/init-ebs-autoscale.log 9 | sh /opt/ebs-autoscale/install.sh $@ -------------------------------------------------------------------------------- /src/ecs-additions/awscli-shim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This shim is for using the AWS ClI v2 with containers that do not have full glibc 4 | # it makes the shared libraries the AWS CLI v2 findable via LD_LIBRARY_PATH 5 | # 6 | # expect to be installed as /opt/aws-cli/bin/aws 7 | # expect to actually call /opt/aws-cli/dist/aws 8 | # expect that /opt/aws-cli is mapped to containers 9 | 10 | BIN_DIR=`dirname $0` 11 | DIST_DIR=`dirname $BIN_DIR`/dist 12 | AWS=$DIST_DIR/aws 13 | 14 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DIST_DIR 15 | 16 | $AWS "$@" 17 | 18 | -------------------------------------------------------------------------------- /src/ecs-additions/ecs-additions-common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # ecs config options 4 | # graceful shutdown of jobs on spot instances if spot is terminated 5 | echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config 6 | # cache already pulled container images and reduce network traffic 7 | echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config 8 | # increase docker stop timeout so that containers can perform cleanup actions 9 | echo ECS_CONTAINER_STOP_TIMEOUT=60 >> /etc/ecs/ecs.config 10 | # This variable specifies how frequently the automated image cleanup process should check for images to delete. The default is every 30 minutes but you can reduce this period to as low as 10 minutes to remove images more frequently. 11 | echo ECS_IMAGE_CLEANUP_INTERVAL=5m >> /etc/ecs/ecs.config 12 | # This variable specifies the minimum amount of time between when an image was pulled and when it may become a candidate for removal. This is used to prevent cleaning up images that have just been pulled. The default is 1 hour. 13 | echo ECS_IMAGE_MINIMUM_CLEANUP_AGE=60m >> /etc/ecs/ecs.config 14 | 15 | # add fetch and run batch helper script 16 | chmod a+x /opt/ecs-additions/fetch_and_run.sh 17 | cp /opt/ecs-additions/fetch_and_run.sh /usr/local/bin 18 | 19 | # add awscli-shim 20 | mv /opt/aws-cli/bin /opt/aws-cli/dist 21 | chmod a+x /opt/ecs-additions/awscli-shim.sh 22 | mkdir /opt/aws-cli/bin 23 | cp /opt/ecs-additions/awscli-shim.sh /opt/aws-cli/bin/aws # Used in Nextflow 24 | 25 | # Remove current symlink 26 | rm -f /usr/local/aws-cli/v2/current/bin/aws 27 | cp /opt/ecs-additions/awscli-shim.sh /usr/local/aws-cli/v2/current/bin/aws # Used in Cromwell 28 | 29 | # ensure that /usr/bin/aws points to the non-shimmed version 30 | ln -sf /usr/local/aws-cli/v2/current/dist/aws /usr/bin/aws 31 | 32 | # add 4GB of swap space 33 | dd if=/dev/zero of=/swapfile bs=128M count=32 34 | chmod 600 /swapfile 35 | mkswap /swapfile 36 | swapon /swapfile 37 | swapon -s 38 | echo '/swapfile swap swap defaults 0 0' >> /etc/fstab 39 | -------------------------------------------------------------------------------- /src/ecs-additions/ecs-additions-cromwell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SECRET_EXIST=$( 4 | aws secretsmanager list-secrets \ 5 | --filters "Key=name,Values=cromwell/credentials/dockerhub" | jq '.SecretList | length > 0') 6 | 7 | if [[ "$SECRET_EXIST" = true ]]; then 8 | SECRET_STRING=$(aws secretsmanager get-secret-value --secret-id cromwell/credentials/dockerhub --query SecretString --output text) 9 | echo 'ECS_ENGINE_AUTH_TYPE=docker' >>/etc/ecs/ecs.config 10 | echo 'ECS_ENGINE_AUTH_DATA={"https://index.docker.io/v1/":'${SECRET_STRING}'}' >>/etc/ecs/ecs.config 11 | fi 12 | -------------------------------------------------------------------------------- /src/ecs-additions/ecs-additions-nextflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2019 Amazon.com, Inc. or its affiliates. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, 9 | # this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 21 | # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 | # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 23 | # THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 24 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 29 | # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | # POSSIBILITY OF SUCH DAMAGE. 31 | 32 | # yum install -y bzip2 wget 33 | # USER=/home/ec2-user 34 | 35 | # wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 36 | # bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $USER/miniconda 37 | # $USER/miniconda/bin/conda install -c conda-forge -y awscli 38 | 39 | # chown -R ec2-user:ec2-user $USER/miniconda 40 | 41 | # rm Miniconda3-latest-Linux-x86_64.sh 42 | -------------------------------------------------------------------------------- /src/ecs-additions/ecs-additions-step-functions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2019 Amazon.com, Inc. or its affiliates. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, 9 | # this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 21 | # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 | # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 23 | # THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 24 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 29 | # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | # POSSIBILITY OF SUCH DAMAGE. 31 | 32 | # yum install -y bzip2 wget 33 | # PREFIX=/opt 34 | 35 | # wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 36 | # bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $PREFIX/miniconda 37 | # $PREFIX/miniconda/bin/conda install -c conda-forge -y awscli 38 | 39 | # chown -R ec2-user:ec2-user $PREFIX/miniconda 40 | 41 | # rm Miniconda3-latest-Linux-x86_64.sh 42 | -------------------------------------------------------------------------------- /src/ecs-additions/provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | OS="$(uname -r)" 7 | BASEDIR="$(dirname "${0}")" 8 | 9 | export OS 10 | 11 | # Expected environment variables 12 | GWFCORE_NAMESPACE=$1 13 | ARTIFACT_S3_ROOT_URL=$2 14 | # WORKFLOW_ORCHESTRATOR (OPTIONAL) 15 | 16 | printenv 17 | 18 | # start ssm-agent 19 | if [[ $OS =~ "amzn1" ]]; then 20 | start amazon-ssm-agent 21 | elif [[ $OS =~ "amzn2" ]]; then 22 | echo "Stopping and upgrading amazon ssm agent" 1>&2 23 | systemctl stop amazon-ssm-agent 24 | systemctl disable amazon-ssm-agent 25 | echo "Downloading latest version" 1>&2 26 | curl \ 27 | --output "amazon-ssm-agent.rpm" \ 28 | "https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm" 29 | echo "Upgrading ssm agent to latest version" 1>&2 30 | rpm \ 31 | --quiet \ 32 | --install \ 33 | --force \ 34 | --upgrade \ 35 | --replacepkgs \ 36 | "amazon-ssm-agent.rpm" 37 | echo "Re-enabling amazon ssm agent" 1>&2 38 | systemctl enable --output=verbose amazon-ssm-agent 39 | systemctl start --output=verbose amazon-ssm-agent 40 | echo "Cleaning up" 1>&2 41 | rm "amazon-ssm-agent.rpm" 42 | else 43 | echo "unsupported os: ${OS}" 44 | exit 100 45 | fi 46 | 47 | function ecs() { 48 | 49 | if [[ $OS =~ "amzn1" ]]; then 50 | # Amazon Linux 1 uses upstart for init 51 | case $1 in 52 | disable) 53 | stop ecs 54 | service docker stop 55 | ;; 56 | enable) 57 | service docker start 58 | start ecs 59 | ;; 60 | esac 61 | elif [[ $OS =~ "amzn2" ]]; then 62 | # Amazon Linux 2 uses systemd for init 63 | case $1 in 64 | disable) 65 | systemctl stop ecs 66 | systemctl stop docker 67 | ;; 68 | enable) 69 | systemctl start docker 70 | systemctl enable --now --no-block ecs # see: https://github.com/aws/amazon-ecs-agent/issues/1707 71 | ;; 72 | esac 73 | else 74 | echo "unsupported os: ${OS}" 75 | exit 100 76 | fi 77 | } 78 | 79 | # make sure that docker and ecs are running on script exit to avoid 80 | # zombie instances 81 | trap "ecs enable" INT ERR EXIT 82 | 83 | set +e 84 | ecs disable 85 | set -e 86 | 87 | ARTIFACT_S3_ROOT_URL=$(\ 88 | aws ssm get-parameter \ 89 | --name "/gwfcore/${GWFCORE_NAMESPACE}/installed-artifacts/s3-root-url" \ 90 | --query 'Parameter.Value' \ 91 | --output text \ 92 | ) 93 | 94 | ORCHESTRATOR_EXIST=$(\ 95 | aws ssm describe-parameters \ 96 | --filters "Key=Name,Values=/gwfcore/${GWFCORE_NAMESPACE}/orchestrator" | \ 97 | jq '.Parameters | length > 0' \ 98 | ) 99 | 100 | if [[ "$ORCHESTRATOR_EXIST" == "true" ]]; then 101 | WORKFLOW_ORCHESTRATOR=$(\ 102 | aws ssm get-parameter \ 103 | --name "/gwfcore/${GWFCORE_NAMESPACE}/orchestrator" \ 104 | --query 'Parameter.Value' \ 105 | --output text \ 106 | ) 107 | fi 108 | 109 | # retrieve and install amazon-ebs-autoscale 110 | cd /opt 111 | bash "${BASEDIR}/get-amazon-ebs-autoscale.sh" \ 112 | --install-version dist_release \ 113 | --artifact-root-url "${ARTIFACT_S3_ROOT_URL}" \ 114 | --file-system btrfs 115 | 116 | # common provisioning for all workflow orchestrators 117 | cd /opt 118 | bash "${BASEDIR}/ecs-additions-common.sh" 119 | 120 | # workflow specific provisioning if needed 121 | if [[ -n "$WORKFLOW_ORCHESTRATOR" ]]; then 122 | if [[ -f "$BASEDIR/ecs-additions-$WORKFLOW_ORCHESTRATOR.sh" ]]; then 123 | bash "$BASEDIR/ecs-additions-$WORKFLOW_ORCHESTRATOR.sh" 124 | fi 125 | fi 126 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | 6 | # CDK asset staging directory 7 | .cdk.staging 8 | cdk.out 9 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/README.md: -------------------------------------------------------------------------------- 1 | # Genomics Workflow CodeBuild 2 | 3 | This AWS CDK stack establishes an AWS CodePipeline that automatically keeps your account "GWF Core" infrastructure up to date with the 4 | latest release of the [aws-samples/aws-genomics-workflows](https://github.com/aws-samples/aws-genomics-workflows) templates 5 | and artifacts. 6 | 7 | The pipeline is triggered by a GitHub webhook that is triggered by "Push" events on the "release" branch of the 8 | aws-genomics-workflows repository. When triggered, it will clone the source code and build the templates and artifacts. 9 | It will then delete any existing "GWF core" Cloudformation deployed stacks and replace them with a new stack. By using 10 | a "delete and replace" strategy rather than an update we avoid issues where AWS Batch Compute Environments don't 11 | associate themselves with new versions of EC2 Launch Templates during an update. 12 | 13 | The pipeline doesn't create any workflow engine stacks, such as Cromwell or Nextflow, on top of the core, although 14 | it would be relatively easy to extend it for this purpose if required. 15 | 16 | ## PreRequisites 17 | 18 | ### GitHub OAuth token 19 | 20 | To set up the GitHub hook and allow cloning of the aws-genomics-workflow repository you will need a GitHub OAuth 21 | token with `Repo` and `admin:repo_hook` permissions. These should be stored in AWS Secrets Manager with the "secret name" 22 | `github-token`. 23 | 24 | * To create the token, follow [these instructions](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) 25 | * To store the token using the AWS CLI: `aws secretsmanager create-secret --name github-token 26 | --description "GitHub OAuth Token" --secret-string "insert your GitHub OAuth token"` 27 | 28 | ### CDK 29 | 30 | To deploy this stack into your account you need to install AWS CDK >= version 1.127.0 which itself requires node.js 10.13.0 or later. 31 | 32 | To install CDK type: 33 | 34 | ```shell 35 | npm install -g aws-cdk 36 | ``` 37 | 38 | If you have not already done so your account and region need to be "bootstrapped" by CDK 39 | 40 | ```shell 41 | cdk bootstrap aws://ACCOUNT-NUMBER/REGION 42 | ``` 43 | 44 | Full details can be found in the CDK [getting started guide](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html). 45 | 46 | ### AWS Account and Region 47 | 48 | CDK will deploy the code pipeline infrastructure into the account and region determined by your curren AWS Profile. 49 | 50 | ## Deployment 51 | 52 | To deploy the infrastructure into your account simply type: 53 | 54 | ```shell 55 | cdk deploy 56 | ``` 57 | 58 | If you want to inspect the cloud formation template that will be used for the deployment you can print it to STDOUT with: 59 | 60 | ```shell 61 | cdk synth 62 | ``` 63 | 64 | ## Useful commands 65 | 66 | * `npm run build` compile typescript to js 67 | * `npm run watch` watch for changes and compile 68 | * `npm run test` perform the jest unit tests 69 | * `cdk deploy` deploy this stack to your default AWS account/region 70 | * `cdk diff` compare deployed stack with current state 71 | * `cdk synth` emits the synthesized CloudFormation template 72 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/bin/aws-genomics-workflow-code-build.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import 'source-map-support/register'; 3 | import * as cdk from '@aws-cdk/core'; 4 | import { AwsGenomicsWorkflowCodeBuildStack } from '../lib/aws-genomics-workflow-code-build-stack'; 5 | 6 | const app = new cdk.App(); 7 | new AwsGenomicsWorkflowCodeBuildStack(app, 'AwsGenomicsWorkflowCodeBuildStack', { 8 | /* If you don't specify 'env', this stack will be environment-agnostic. 9 | * Account/Region-dependent features and context lookups will not work, 10 | * but a single synthesized template can be deployed anywhere. */ 11 | 12 | /* Uncomment the next line to specialize this stack for the AWS Account 13 | * and Region that are implied by the current CLI configuration. */ 14 | env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION }, 15 | 16 | /* Uncomment the next line if you know exactly what Account and Region you 17 | * want to deploy the stack to. */ 18 | // env: { account: '123456789012', region: 'us-east-1' }, 19 | 20 | /* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */ 21 | }); 22 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/aws-genomics-workflow-code-build.ts", 3 | "context": { 4 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 5 | "@aws-cdk/core:enableStackNameDuplicates": "true", 6 | "aws-cdk:enableDiffNoFail": "true", 7 | "@aws-cdk/core:stackRelativeExports": "true", 8 | "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true, 9 | "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true, 10 | "@aws-cdk/aws-kms:defaultKeyPolicies": true, 11 | "@aws-cdk/aws-s3:grantWriteWithoutAcl": true, 12 | "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true, 13 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 14 | "@aws-cdk/aws-efs:defaultEncryptionAtRest": true 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | roots: ['/test'], 3 | testMatch: ['**/*.test.ts'], 4 | transform: { 5 | '^.+\\.tsx?$': 'ts-jest' 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/lib/aws-genomics-workflow-code-build-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from '@aws-cdk/core'; 2 | import * as codebuild from '@aws-cdk/aws-codebuild'; 3 | import * as s3 from '@aws-cdk/aws-s3'; 4 | import * as iam from '@aws-cdk/aws-iam'; 5 | import * as codepipeline from '@aws-cdk/aws-codepipeline'; 6 | import * as actions from '@aws-cdk/aws-codepipeline-actions'; 7 | import * as ec2 from '@aws-cdk/aws-ec2'; 8 | import * as regionInfo from '@aws-cdk/region-info'; 9 | 10 | 11 | export class AwsGenomicsWorkflowCodeBuildStack extends cdk.Stack { 12 | constructor(scope: cdk.Construct, id: string, props?: cdk.StackProps) { 13 | super(scope, id, props); 14 | 15 | const info = regionInfo.RegionInfo.get(this.region); 16 | const s3Endpoint = info.servicePrincipal("s3.amazonaws.com"); 17 | 18 | const vpc = new ec2.Vpc(this, "CromwellVPC", { 19 | maxAzs: 3, 20 | gatewayEndpoints: { 21 | S3: { 22 | service: ec2.GatewayVpcEndpointAwsService.S3, 23 | }, 24 | } 25 | }); 26 | 27 | // S3 bucket for storing templates and artifacts 28 | const artifactBucket = new s3.Bucket(this,"GWFArtifactsBucket", { 29 | encryption: s3.BucketEncryption.S3_MANAGED, 30 | }); 31 | 32 | // S3 bucket that Cromwell will use 33 | const gwfBucket = new s3.Bucket(this, "GWFCoreBucket", { 34 | encryption: s3.BucketEncryption.S3_MANAGED, 35 | }) 36 | 37 | // objects needed for the "Source" stage of the pipeline 38 | const gitHubToken: cdk.SecretValue = cdk.SecretValue.secretsManager("github-token") 39 | const sourceOutput = new codepipeline.Artifact(); 40 | const sourceAction = new actions.GitHubSourceAction({ 41 | actionName: "GitHub_Source", 42 | owner: 'aws-samples', 43 | repo: "aws-genomics-workflows", 44 | branch: "release", 45 | oauthToken: gitHubToken, 46 | output: sourceOutput, 47 | trigger: actions.GitHubTrigger.WEBHOOK 48 | }) 49 | 50 | // objects needed for the "Build" stage of the pipeline 51 | const buildOutput = new codepipeline.Artifact(); 52 | const project = new codebuild.Project(this, "GenomicsWorkflowBuildProject", { 53 | description: "Builds the templates and artifacts for aws-genomics-workflows", 54 | artifacts: codebuild.Artifacts.s3({ 55 | bucket: artifactBucket, 56 | packageZip: false, 57 | }), 58 | buildSpec: codebuild.BuildSpec.fromObject({ 59 | version: 0.2, 60 | phases: { 61 | build: { 62 | commands: [ 63 | "ls -alF", 64 | "bash _scripts/make-dist.sh --verbose", 65 | "ls -alF dist/", 66 | `aws s3 sync dist/ s3://${artifactBucket.bucketName}` 67 | ], 68 | }, 69 | }, 70 | artifacts: { 71 | "base-directory": "dist", 72 | files: "**/*", 73 | } 74 | }), 75 | environment: {buildImage: codebuild.LinuxBuildImage.AMAZON_LINUX_2_3}, 76 | concurrentBuildLimit: 1, 77 | timeout: cdk.Duration.minutes(15), 78 | }); 79 | project.addToRolePolicy(new iam.PolicyStatement({ 80 | effect: iam.Effect.ALLOW, 81 | actions: ["s3:Get*", "s3:Put*", "s3:List*"], 82 | resources: [`${artifactBucket.bucketArn}`, `${artifactBucket.bucketArn}/*`] 83 | })); 84 | const buildAction = new actions.CodeBuildAction({ 85 | actionName: "Build_Artifacts_And_Templates", 86 | project: project, 87 | input: sourceOutput, 88 | outputs: [ buildOutput ] 89 | }); 90 | 91 | //objects needed for the "Deploy" stage of the pipeline 92 | const deleteGWFCoreStackAction = new actions.CloudFormationDeleteStackAction({ 93 | actionName: "Delete_GWF_Core_Stack", 94 | stackName: "GWFCoreStack", 95 | adminPermissions: true, 96 | runOrder: 10, 97 | }); 98 | const createGWFCoreAction = new actions.CloudFormationCreateUpdateStackAction({ 99 | actionName: "Create_GWF_Core", 100 | stackName: "GWFCoreStack", 101 | adminPermissions: true, 102 | templatePath: buildOutput.atPath("templates/gwfcore/gwfcore-root.template.yaml"), 103 | parameterOverrides: { 104 | VpcId: vpc.vpcId, 105 | SubnetIds: vpc.privateSubnets.map(value => value.subnetId).join(","), 106 | ArtifactBucketName: artifactBucket.bucketName, 107 | TemplateRootUrl: `https://${artifactBucket.bucketName}.${s3Endpoint}/templates`, 108 | S3BucketName: gwfBucket.bucketName, 109 | ExistingBucket: "Yes", 110 | }, 111 | runOrder: 20, 112 | }); 113 | 114 | 115 | // the pipeline 116 | new codepipeline.Pipeline(this, 'AmazonGenomicsWorkflowPipeline', { 117 | pipelineName: 'AmazonGenomicsWorkflowPipeline', 118 | stages: [ 119 | { 120 | stageName: 'Source', 121 | actions: [ 122 | sourceAction, 123 | ], 124 | }, 125 | { 126 | stageName: 'Build', 127 | actions: [ 128 | buildAction 129 | ], 130 | }, 131 | { 132 | stageName: 'Deploy', 133 | actions: [ 134 | deleteGWFCoreStackAction, 135 | createGWFCoreAction, 136 | ], 137 | }, 138 | ], 139 | }); 140 | 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gwf-core-codepipeline", 3 | "version": "1.0.0", 4 | "dependencies": { 5 | "@aws-cdk/aws-codebuild": "^1.127.0", 6 | "@aws-cdk/aws-codepipeline": "^1.127.0", 7 | "@aws-cdk/aws-codepipeline-actions": "^1.127.0", 8 | "@aws-cdk/core": "^1.127.0", 9 | "source-map-support": "0.5.16" 10 | }, 11 | "bin": { 12 | "cdk-test": "bin/cdk-test.js" 13 | }, 14 | "scripts": { 15 | "build": "tsc", 16 | "watch": "tsc -w", 17 | "test": "jest --passWithNoTests", 18 | "cdk": "cdk" 19 | }, 20 | "devDependencies": { 21 | "@aws-cdk/assert": "^1.127.0", 22 | "@types/jest": "^26.0.10", 23 | "@types/node": "10.17.27", 24 | "jest": "^27.2.5", 25 | "ts-jest": "^26.2.0", 26 | "aws-cdk": "^1.127.0", 27 | "ts-node": "^9.0.0", 28 | "typescript": "~3.9.7" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/gwf-core-codepipeline/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": ["es2018"], 6 | "declaration": true, 7 | "strict": true, 8 | "noImplicitAny": true, 9 | "strictNullChecks": true, 10 | "noImplicitThis": true, 11 | "alwaysStrict": true, 12 | "noUnusedLocals": false, 13 | "noUnusedParameters": false, 14 | "noImplicitReturns": true, 15 | "noFallthroughCasesInSwitch": false, 16 | "inlineSourceMap": true, 17 | "inlineSources": true, 18 | "experimentalDecorators": true, 19 | "strictPropertyInitialization": false, 20 | "typeRoots": ["./node_modules/@types"] 21 | }, 22 | "exclude": ["cdk.out"] 23 | } 24 | -------------------------------------------------------------------------------- /src/lambda/codebuild/lambda.py: -------------------------------------------------------------------------------- 1 | # /********************************************************************************************************************* 2 | # * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * 3 | # * * 4 | # * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance * 5 | # * with the License. A copy of the License is located at * 6 | # * * 7 | # * http://www.apache.org/licenses/LICENSE-2.0 * 8 | # * * 9 | # * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES * 10 | # * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions * 11 | # * and limitations under the License. * 12 | # *********************************************************************************************************************/ 13 | 14 | from __future__ import print_function 15 | from crhelper import CfnResource 16 | import logging 17 | import boto3 18 | import time 19 | 20 | logger = logging.getLogger(__name__) 21 | # Initialise the helper, all inputs are optional, this example shows the defaults 22 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL') 23 | 24 | try: 25 | codebuild = boto3.client('codebuild') 26 | # pass 27 | except Exception as e: 28 | helper.init_failure(e) 29 | 30 | 31 | @helper.create 32 | def create(event, context): 33 | logger.info("Got Create") 34 | start_build_job(event, context) 35 | 36 | 37 | @helper.update 38 | def update(event, context): 39 | logger.info("Got Update") 40 | start_build_job(event, context) 41 | 42 | 43 | @helper.delete 44 | def delete(event, context): 45 | logger.info("Got Delete") 46 | # Delete never returns anything. Should not fail if the underlying resources are already deleted. Desired state. 47 | 48 | 49 | @helper.poll_create 50 | def poll_create(event, context): 51 | logger.info("Got Create poll") 52 | return check_build_job_status(event, context) 53 | 54 | 55 | @helper.poll_update 56 | def poll_update(event, context): 57 | logger.info("Got Update poll") 58 | return check_build_job_status(event, context) 59 | 60 | 61 | def handler(event, context): 62 | helper(event, context) 63 | 64 | 65 | def start_build_job(event, context, action='setup'): 66 | response = codebuild.start_build( 67 | projectName=event['ResourceProperties']['BuildProject'] 68 | ) 69 | logger.info(response) 70 | helper.Data.update({"JobID": response['build']['id']}) 71 | 72 | 73 | def check_build_job_status(event, context): 74 | code_build_project_name = event['ResourceProperties']['BuildProject'] 75 | 76 | if not helper.Data.get("JobID"): 77 | raise ValueError("Job ID missing in the polling event.") 78 | 79 | job_id = helper.Data.get("JobID") 80 | 81 | # 'SUCCEEDED' | 'FAILED' | 'FAULT' | 'TIMED_OUT' | 'IN_PROGRESS' | 'STOPPED' 82 | response = codebuild.batch_get_builds(ids=[job_id]) 83 | build_status = response['builds'][0]['buildStatus'] 84 | 85 | if build_status == 'IN_PROGRESS': 86 | logger.info(build_status) 87 | return None 88 | else: 89 | if build_status == 'SUCCEEDED': 90 | logger.info(build_status) 91 | return True 92 | else: 93 | msg = "Code Build job '{0}' in project '{1}' exited with a build status of '{2}'." \ 94 | .format(job_id, code_build_project_name, build_status) 95 | logger.info(msg) 96 | raise ValueError(msg) 97 | -------------------------------------------------------------------------------- /src/lambda/codebuild/requirements.txt: -------------------------------------------------------------------------------- 1 | crhelper 2 | -------------------------------------------------------------------------------- /src/lambda/ecr/lambda.py: -------------------------------------------------------------------------------- 1 | # /********************************************************************************************************************* 2 | # * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * 3 | # * * 4 | # * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance * 5 | # * with the License. A copy of the License is located at * 6 | # * * 7 | # * http://www.apache.org/licenses/LICENSE-2.0 * 8 | # * * 9 | # * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES * 10 | # * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions * 11 | # * and limitations under the License. * 12 | # *********************************************************************************************************************/ 13 | 14 | from time import sleep 15 | 16 | import boto3 17 | import cfnresponse 18 | 19 | 20 | send, SUCCESS, FAILED = ( 21 | cfnresponse.send, 22 | cfnresponse.SUCCESS, 23 | cfnresponse.FAILED 24 | ) 25 | ecr = boto3.client('ecr') 26 | 27 | 28 | def wait(repo, until): 29 | until = until.lower() 30 | if until == "deleted": 31 | while True: 32 | try: 33 | sleep(1) 34 | ecr.describe_repositories(repositoryNames=[repo]) 35 | except ecr.exceptions.RepositoryNotFoundException: 36 | break 37 | 38 | if until == "exists": 39 | exists = False 40 | while not exists: 41 | try: 42 | sleep(1) 43 | exists = ecr.describe_repositories(repositoryNames=[repo])["repositories"] 44 | break 45 | except ecr.exceptions.RepositoryNotFoundException: 46 | exists = False 47 | 48 | 49 | 50 | def put_lifecycle_policy(repo, props): 51 | if props.get("LifecyclePolicy"): 52 | ecr.put_lifecycle_policy( 53 | repositoryName=repo, 54 | lifecyclePolicyText=props["LifecyclePolicy"]["LifecyclePolicyText"] 55 | ) 56 | 57 | 58 | def create(repo, props, event, context): 59 | # use existing repository if available, otherwise create 60 | try: 61 | ecr.create_repository(repositoryName=repo) 62 | wait(repo, "exists") 63 | put_lifecycle_policy(repo, props) 64 | 65 | except ecr.exceptions.RepositoryAlreadyExistsException: 66 | print(f"Repository '{repo}' already exists - CREATE ECR repository ignored") 67 | put_lifecycle_policy(repo, props) 68 | 69 | except Exception as e: 70 | send(event, context, FAILED, None) 71 | raise(e) 72 | 73 | 74 | def update(repo, props, event, context): 75 | # use existing repository if available 76 | update_policy = props.get("UpdateReplacePolicy") 77 | try: 78 | if update_policy and update_policy.lower() == "retain": 79 | put_lifecycle_policy(repo, props) 80 | else: 81 | # replace the repo 82 | delete(repo, props, event, context) 83 | create(repo, props, event, context) 84 | except Exception as e: 85 | send(event, context, FAILED, None) 86 | raise(e) 87 | 88 | 89 | def delete(repo, props, event, context): 90 | # retain repository if specified 91 | # otherwise force delete 92 | delete_policy = props.get("DeletePolicy") 93 | try: 94 | if delete_policy and not delete_policy.lower() == "retain": 95 | ecr.delete_repository(repositoryName=repo, force=True) 96 | wait(repo, "deleted") 97 | 98 | except Exception as e: 99 | send(event, context, FAILED, None) 100 | raise(e) 101 | 102 | 103 | def handler(event, context): 104 | props = event["ResourceProperties"] 105 | repo = props.get("RepositoryName") 106 | 107 | if event["RequestType"] in ("Create", "Update", "Delete"): 108 | action = globals()[event["RequestType"].lower()] 109 | action(repo, props, event, context) 110 | send(event, context, SUCCESS, None) 111 | else: 112 | # unhandled request type 113 | send(event, context, FAILED, None) -------------------------------------------------------------------------------- /src/lambda/ecr/requirements.txt: -------------------------------------------------------------------------------- 1 | #crhelper 2 | cfnresponse -------------------------------------------------------------------------------- /src/templates/README.md: -------------------------------------------------------------------------------- 1 | # Genomics Workflows on AWS CloudFormation templates 2 | 3 | Contained herein are CloudFormation templates for creating AWS resources for working with large-scale biomedical data - e.g. genomics. 4 | 5 | ## Core Stack 6 | 7 | Templates in `gwfcore` are the "core" stack. The root template is: 8 | 9 | | File | Description | 10 | | :--- | :---------- | 11 | | `gwfcore-root.template.yaml` | Root stack that invokes nested stacks (see below) | 12 | 13 | Nested stacks are as follows and listed in order of creation: 14 | 15 | | File | Description | 16 | | :--- | :---------- | 17 | | `gwfcore-s3.template.yaml` | Creates an S3 bucket for storing installed artifacts and workflow input and output data | 18 | | `gwfcore-code.template.yaml` | Creates and installs code and artifacts used to run subsequent templates and provision EC2 instances | 19 | | `gwfcore-launch-template.template.yaml` | Creates an EC2 Launch Template used in AWS Batch Compute Environments | 20 | | `gwfcore-iam.template.yaml` | Creates IAM roles for AWS Batch resources | 21 | | `gwfcore-batch.template.yaml` | Creates AWS Batch Job Queues and Compute Environments for job execution | 22 | 23 | Optional Stacks 24 | | File | Description | 25 | | :--- | :---------- | 26 | | `gwfcore-fsx.template.yaml` | Creates an FSx for Lustre file system (only Persistent 1 type) mapped to the S3 bucket for storing workflow input, output and reference data. Refer Note section at the bottom. | 27 | | `gwfcore-efs.template.yaml` | Creates an EFS file system for storing workflow input, output and reference data | 28 | 29 | ## Orchestration Stacks 30 | 31 | The following Stacks provide solutions that utilize: 32 | 33 | * AWS Step-Functions 34 | * Cromwell 35 | * Nextflow 36 | 37 | They build atop the Core Stack above. They provide the additional resources needed to run each orchestrator. 38 | 39 | | File | Description | 40 | | :--- | :---------- | 41 | | `step-functions/sfn-resources.template.yaml` | Creates an example AWS Step Functions state-machine and containers for an example genomics workflow using BWA, samtools, and bcftools. | 42 | | `cromwell/cromwell-resources.template.yaml` | Creates an EC2 instance with Cromwell pre-installed and launched in "server" mode and an RDS Aurora Serverless database | 43 | | `nextflow/nextflow-resources.template.yaml` | Creates a Nextflow container and AWS Batch Job Definition for running Nextflow | 44 | 45 | 46 | Note : As System Manager Parameter Store is being used, make sure to increase the throughput from console. To do that follow below : 47 | AWS Systems Manager -> Parameter Store -> Settings -> Parameter Store throughput -> paid tier/higher throughput limit. -------------------------------------------------------------------------------- /src/templates/gwfcore/gwfcore-efs.template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | AWSTemplateFormatVersion: '2010-09-09' 3 | Description: >- 4 | (WWPS-GLS-WF-GWFCORE-EFS) Creates EFS file system and mount targets to a list of subnets 5 | 6 | Parameters: 7 | VpcId: 8 | Type: AWS::EC2::VPC::Id 9 | Description: 'The VPC to create security groups and deploy AWS Batch to. NOTE: Must be the same VPC as the provided subnet IDs.' 10 | SubnetIds: 11 | Type: List 12 | Description: 'Subnets you want your batch compute environment to launch in. We recommend private subnets. NOTE: Must be from the VPC provided.' 13 | NumberOfSubnets: 14 | Type: Number 15 | Description: Number of subnets to launch into. Should correspond to the length of Subnet Ids 16 | MinValue: 1 17 | MaxValue: 6 18 | 19 | Conditions: 20 | 2SubnetCondition: !Or 21 | - !Equals [!Ref 'NumberOfSubnets', '2'] 22 | - !Condition '3SubnetCondition' 23 | 3SubnetCondition: !Or 24 | - !Equals [!Ref 'NumberOfSubnets', '3'] 25 | - !Condition '4SubnetCondition' 26 | 4SubnetCondition: !Or 27 | - !Equals [!Ref 'NumberOfSubnets', '4'] 28 | - !Condition '5SubnetCondition' 29 | 5SubnetCondition: !Or 30 | - !Equals [!Ref 'NumberOfSubnets', '5'] 31 | - !Condition '6SubnetCondition' 32 | 6SubnetCondition: !Equals [!Ref NumberOfSubnets, '6'] 33 | 34 | Resources: 35 | SharedDataFileSystem: 36 | Type: AWS::EFS::FileSystem 37 | Properties: 38 | PerformanceMode: generalPurpose 39 | Encrypted: true 40 | FileSystemTags: 41 | - Key: Name 42 | Value: SharedDataGenomics 43 | 44 | MountTargetSecurityGroup: 45 | Type: AWS::EC2::SecurityGroup 46 | Properties: 47 | VpcId: !Ref VpcId 48 | GroupDescription: Security group for mount target 49 | SecurityGroupIngress: 50 | - IpProtocol: tcp 51 | FromPort: 2049 52 | ToPort: 2049 53 | CidrIp: 0.0.0.0/0 54 | 55 | MountTargetSubnet1: 56 | Type: AWS::EFS::MountTarget 57 | Properties: 58 | FileSystemId: !Ref SharedDataFileSystem 59 | SubnetId: !Select [0, !Ref SubnetIds] 60 | SecurityGroups: 61 | - !Ref MountTargetSecurityGroup 62 | 63 | MountTargetSubnet2: 64 | Type: AWS::EFS::MountTarget 65 | Condition: 2SubnetCondition 66 | Properties: 67 | FileSystemId: !Ref SharedDataFileSystem 68 | SubnetId: !Select [1, !Ref SubnetIds] 69 | SecurityGroups: 70 | - !Ref MountTargetSecurityGroup 71 | 72 | MountTargetSubnet3: 73 | Type: AWS::EFS::MountTarget 74 | Condition: 3SubnetCondition 75 | Properties: 76 | FileSystemId: !Ref SharedDataFileSystem 77 | SubnetId: !Select [2, !Ref SubnetIds] 78 | SecurityGroups: 79 | - !Ref MountTargetSecurityGroup 80 | 81 | MountTargetSubnet4: 82 | Type: AWS::EFS::MountTarget 83 | Condition: 4SubnetCondition 84 | Properties: 85 | FileSystemId: !Ref SharedDataFileSystem 86 | SubnetId: !Select [3, !Ref SubnetIds] 87 | SecurityGroups: 88 | - !Ref MountTargetSecurityGroup 89 | 90 | MountTargetSubnet5: 91 | Type: AWS::EFS::MountTarget 92 | Condition: 5SubnetCondition 93 | Properties: 94 | FileSystemId: !Ref SharedDataFileSystem 95 | SubnetId: !Select [4, !Ref SubnetIds] 96 | SecurityGroups: 97 | - !Ref MountTargetSecurityGroup 98 | 99 | MountTargetSubnet6: 100 | Type: AWS::EFS::MountTarget 101 | Condition: 6SubnetCondition 102 | Properties: 103 | FileSystemId: !Ref SharedDataFileSystem 104 | SubnetId: !Select [5, !Ref SubnetIds] 105 | SecurityGroups: 106 | - !Ref MountTargetSecurityGroup 107 | 108 | Outputs: 109 | EfsId: 110 | Value: !Ref SharedDataFileSystem 111 | Description: EFS ID 112 | 113 | -------------------------------------------------------------------------------- /src/templates/gwfcore/gwfcore-fsx.template.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Description: >- 3 | (WWPS-GLS-WF-GWFCORE-FSX) Creates FSx for Lustre file system 4 | 5 | Parameters: 6 | VpcId: 7 | Type: AWS::EC2::VPC::Id 8 | Description: The VPC to create security groups 9 | SubnetId: 10 | Type: String 11 | Description: "Subnet you want your FSx for lustre file system to launch in. Ensure Batch compute environment is also launched in that subnet only." 12 | S3BucketName: 13 | Type: String 14 | AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))" 15 | ConstraintDescription: "Must respect AWS naming conventions" 16 | Description: A S3 bucket name to mount on FSx 17 | FSxStorageType: 18 | Type: String 19 | Description: The type of FS needed i.e. SSD/HDD/SCRATCH, all capitals. 20 | Default: "SCRATCH" 21 | AllowedValues: 22 | - "SSD" 23 | - "HDD" 24 | - "SCRATCH" 25 | FSxStorageVolumeSize: 26 | Type: Number 27 | Default: 0 28 | Description: The initial size of the FSx volume to be used in GB. 0 will map to the minimum allowed size for this type of file system. 29 | FSxPerUnitStorageThroughput: 30 | Type: Number 31 | Default: 0 32 | Description: The throughput to be used for the storage, should be as provided, SSD - 50/100/200 mbps or HDD - 12/40. 0 will map to the minimum allowed throughput for this type of file system. 33 | 34 | Mappings: 35 | FSxTypeMap: 36 | SSD: 37 | DeploymentTypeString: "PERSISTENT_1" 38 | StorageTypeString: "SSD" 39 | MinThroughput: 50 40 | MinStorageCap: 1200 41 | HDD: 42 | DeploymentTypeString: "PERSISTENT_1" 43 | StorageTypeString: "HDD" 44 | MinThroughput: 12 45 | MinStorageCap: 6000 46 | SCRATCH: 47 | DeploymentTypeString: "SCRATCH_2" 48 | StorageTypeString: "SSD" 49 | MinThroughput: "NA" 50 | MinStorageCap: 1200 51 | 52 | TagMap: 53 | default: 54 | architecture: "genomics-workflows" 55 | solution: "default" 56 | tags: 57 | - Key: "architecture" 58 | Value: "genomics-workflows" 59 | - Key: "solution" 60 | Value: "default" 61 | 62 | Conditions: 63 | ScratchCheck: !Equals [!Ref FSxStorageType, "SCRATCH"] 64 | TypeCheck: !Or [!Equals [!Ref FSxStorageType, "SCRATCH"], !Equals [!Ref FSxStorageType, "SSD"]] 65 | IsMinThroughput: !Equals [!Ref FSxPerUnitStorageThroughput, 0] 66 | IsMinStorageCapacity: !Equals [!Ref FSxStorageVolumeSize, 0] 67 | 68 | 69 | Resources: 70 | FSxSecurityGroup: 71 | Type: AWS::EC2::SecurityGroup 72 | Properties: 73 | GroupDescription: SG for FSx 74 | VpcId: 75 | Ref: VpcId 76 | Tags: 77 | - Key: Application 78 | Value: AWS-GENOMICS-WKF 79 | 80 | SGIngressTCP988: 81 | Type: AWS::EC2::SecurityGroupIngress 82 | Properties: 83 | Description: "Allow TCP Connections for this security group" 84 | GroupId: !Ref FSxSecurityGroup 85 | SourceSecurityGroupId: !Ref FSxSecurityGroup 86 | IpProtocol: tcp 87 | FromPort: 988 88 | ToPort: 988 89 | 90 | SGIngressTCP1021: 91 | Type: AWS::EC2::SecurityGroupIngress 92 | Properties: 93 | Description: "Allow TCP Connections for this security group" 94 | GroupId: !Ref FSxSecurityGroup 95 | SourceSecurityGroupId: !Ref FSxSecurityGroup 96 | IpProtocol: tcp 97 | FromPort: 1021 98 | ToPort: 1023 99 | 100 | FSxFileSystem: 101 | Type: AWS::FSx::FileSystem 102 | Properties: 103 | FileSystemType: "LUSTRE" 104 | LustreConfiguration: 105 | AutoImportPolicy: "NEW_CHANGED" 106 | DeploymentType: !FindInMap [FSxTypeMap, !Ref FSxStorageType, DeploymentTypeString] 107 | DriveCacheType: 108 | Fn::If: 109 | - TypeCheck 110 | - !Ref AWS::NoValue 111 | - "NONE" 112 | ExportPath: !Sub s3://${S3BucketName} 113 | ImportPath: !Sub s3://${S3BucketName} 114 | PerUnitStorageThroughput: 115 | Fn::If: 116 | - ScratchCheck 117 | - !Ref AWS::NoValue 118 | - Fn::If: 119 | - IsMinThroughput 120 | - !FindInMap [FSxTypeMap, !Ref FSxStorageType, MinThroughput] 121 | - !Ref FSxPerUnitStorageThroughput 122 | SecurityGroupIds: 123 | - !Ref FSxSecurityGroup 124 | StorageCapacity: 125 | Fn::If: 126 | - IsMinStorageCapacity 127 | - !FindInMap [FSxTypeMap, !Ref FSxStorageType, MinStorageCap] 128 | - !Ref FSxStorageVolumeSize 129 | StorageType: !FindInMap [FSxTypeMap, !Ref FSxStorageType, StorageTypeString] 130 | SubnetIds: [!Ref SubnetId] 131 | Tags: 132 | - Key: architecture 133 | Value: !FindInMap ["TagMap", "default", "architecture"] 134 | 135 | Outputs: 136 | FSxId: 137 | Value: !Ref FSxFileSystem 138 | Description: FSx ID 139 | FSxMount: 140 | Value: !GetAtt FSxFileSystem.LustreMountName 141 | Description: FSx Mount Name 142 | FSxSecurityGroupId: 143 | Description: The FSx Security Group 144 | Value: !Ref FSxSecurityGroup -------------------------------------------------------------------------------- /src/templates/gwfcore/gwfcore-s3.template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | AWSTemplateFormatVersion: 2010-09-09 3 | Description: >- 4 | (WWPS-GLS-WF-GWFCORE-S3) A S3 bucket for storing results from genomics analysis 5 | 6 | Mappings: 7 | TagMap: 8 | default: 9 | architecture: "genomics-workflows" 10 | solution: "default" 11 | tags: 12 | - Key: "architecture" 13 | Value: "genomics-workflows" 14 | - Key: "solution" 15 | Value: "default" 16 | 17 | Parameters: 18 | Namespace: 19 | Type: String 20 | Description: Namespace (e.g. project name) to use to label resources 21 | 22 | S3BucketName: 23 | Type: String 24 | Description: >- 25 | A S3 bucket name for storing analysis results. 26 | The bucket name must respect the S3 bucket naming conventions 27 | (can contain lowercase letters, numbers, periods and hyphens). 28 | If left blank a unique bucket name will be generated. 29 | AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))" 30 | ConstraintDescription: "Must respect AWS naming conventions" 31 | ExistingBucket: 32 | Type: String 33 | Description: Does this bucket already exist? If not, it will be created. 34 | AllowedValues: 35 | - "Yes" 36 | - "No" 37 | Default: "No" 38 | 39 | Conditions: 40 | BucketDoesNotExist: !Equals [ !Ref ExistingBucket, "No" ] 41 | GenerateBucketName: !Equals [ !Ref S3BucketName, "" ] 42 | 43 | Resources: 44 | S3Bucket: 45 | Type: AWS::S3::Bucket 46 | Condition: BucketDoesNotExist 47 | DeletionPolicy: Retain 48 | UpdateReplacePolicy: Retain 49 | Properties: 50 | BucketName: 51 | Fn::If: 52 | - GenerateBucketName 53 | - !Sub gwfcore-${Namespace} 54 | - !Ref S3BucketName 55 | BucketEncryption: 56 | ServerSideEncryptionConfiguration: 57 | - ServerSideEncryptionByDefault: 58 | SSEAlgorithm: AES256 59 | Tags: 60 | - Key: architecture 61 | Value: !FindInMap ["TagMap", "default", "architecture"] 62 | 63 | Outputs: 64 | BucketName: 65 | Value: 66 | Fn::If: 67 | - BucketDoesNotExist 68 | - !Ref S3Bucket 69 | - !Ref S3BucketName 70 | BucketArn: 71 | Value: 72 | Fn::If: 73 | - BucketDoesNotExist 74 | - !GetAtt S3Bucket.Arn 75 | - !Sub arn:aws:s3:::${S3BucketName} 76 | ... 77 | --------------------------------------------------------------------------------