├── .cfnlintrc.yaml
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── gh-pages.yml
    │   ├── stale.yml
    │   └── test.yml
├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── _scripts
    ├── configure-deploy.sh
    ├── deploy.sh
    ├── make-dist.sh
    └── test.sh
├── docs
    ├── containers
    │   ├── container-examples.md
    │   └── container-introduction.md
    ├── core-env
    │   ├── build-custom-distribution.md
    │   ├── create-custom-compute-resources.md
    │   ├── create-iam-roles.md
    │   ├── create-s3-bucket.md
    │   ├── custom-deploy.md
    │   ├── images
    │   │   ├── aws-genomics-workflows-high-level-arch.png
    │   │   └── ebs-autoscale.png
    │   ├── introduction.md
    │   └── setup-aws-batch.md
    ├── disclaimer.md
    ├── extra.css
    ├── images
    │   ├── AWS_logo_RGB.svg
    │   ├── AWS_logo_RGB_REV.svg
    │   ├── AWS_logo_RGB_WHT.svg
    │   ├── aws-genomics-workflows-banner.png
    │   ├── cloudformation-launch-stack.png
    │   ├── custom-deploy-0.png
    │   ├── custom-deploy-1.png
    │   ├── genomics-workflow.png
    │   ├── nextflow-0.png
    │   ├── root-vpc-1.png
    │   ├── root-vpc-2.png
    │   ├── root-vpc-3.png
    │   ├── root-vpc-4.png
    │   └── root-vpc-5.png
    ├── index.md
    ├── install-cromwell
    │   ├── images
    │   │   ├── screen1.png
    │   │   ├── screen2.png
    │   │   ├── screen3.png
    │   │   ├── screen4.png
    │   │   └── screen5.png
    │   └── index.md
    ├── orchestration
    │   ├── cost-effective-workflows
    │   │   ├── cost-effective-workflows.md
    │   │   └── images
    │   │   │   ├── ClusterDashboard.png
    │   │   │   ├── ScreenShot1.png
    │   │   │   ├── ScreenShot1a.png
    │   │   │   ├── ScreenShot2.png
    │   │   │   ├── ScreenShot3.png
    │   │   │   └── TaskDashboard.png
    │   ├── cromwell
    │   │   ├── cromwell-examples.md
    │   │   ├── cromwell-overview.md
    │   │   ├── cromwell-trouble-shooting.md
    │   │   └── images
    │   │   │   └── cromwell-on-aws_infrastructure.png
    │   ├── nextflow
    │   │   ├── images
    │   │   │   ├── nextflow-on-aws-infrastructure.png
    │   │   │   └── nextflow-on-aws-infrastructure.xml
    │   │   ├── nextflow-overview.md
    │   │   └── nextflow-trouble-shooting.md
    │   ├── orchestration-intro.md
    │   └── step-functions
    │   │   ├── files
    │   │       └── example-state-machine.json
    │   │   ├── images
    │   │       ├── aws-sfn-genomics-workflow-arch.png
    │   │       ├── cfn-stack-outputs-statemachineinput.png
    │   │       ├── cfn-stack-outputs-tab.png
    │   │       ├── example-state-machine.png
    │   │       ├── sfn-batch-job-snippet.png
    │   │       ├── sfn-console-execution-inprogress.png
    │   │       ├── sfn-console-start-execution-dialog.png
    │   │       ├── sfn-console-start-execution.png
    │   │       ├── sfn-console-statemachine.png
    │   │       ├── sfn-example-mapping-state-machine.png
    │   │       └── step-functions-structures.png
    │   │   ├── step-functions-examples.md
    │   │   └── step-functions-overview.md
    └── quick-start.md
├── environment.yaml
├── main.py
├── mkdocs.yml
├── requirements.txt
└── src
    ├── .gitignore
    ├── aws-genomics-cdk
        ├── .gitignore
        ├── .npmignore
        ├── README.md
        ├── app.config.json
        ├── assets
        │   ├── genomics-policy-s3.json
        │   └── launch_template_user_data.txt
        ├── bin
        │   └── aws-genomics-cdk.ts
        ├── cdk.json
        ├── containers
        │   ├── README.md
        │   ├── build.sh
        │   ├── bwa
        │   │   └── Dockerfile
        │   ├── entry.dockerfile
        │   ├── entrypoint.sh
        │   ├── fastqc
        │   │   └── Dockerfile
        │   ├── gatk
        │   │   └── Dockerfile
        │   ├── minimap2
        │   │   └── Dockerfile
        │   ├── picard
        │   │   └── Dockerfile
        │   └── samtools
        │   │   └── Dockerfile
        ├── examples
        │   ├── README.md
        │   ├── batch-bwa-job.json
        │   ├── batch-fastqc-job.json
        │   ├── batch-gatk-dictionary.json
        │   ├── batch-gatk-htc.json
        │   ├── batch-minimap2-job.json
        │   ├── batch-picard-add-missing-groups.json
        │   ├── batch-samtools-index.json
        │   └── batch-samtools-sort.json
        ├── jest.config.js
        ├── lib
        │   ├── aws-genomics-cdk-stack.ts
        │   ├── batch
        │   │   ├── batch-compute-environmnet-construct.ts
        │   │   ├── batch-iam-stack.ts
        │   │   ├── batch-stack.ts
        │   │   ├── job-queue-construct.ts
        │   │   └── launch-template-construct.ts
        │   ├── vpc
        │   │   └── vpc-stack.ts
        │   └── workflows
        │   │   ├── genomics-task-construct.ts
        │   │   ├── job-definition-construct.ts
        │   │   ├── job-definitions.ts
        │   │   ├── variant-calling-stack.ts
        │   │   └── workflow-config.ts
        ├── package-lock.json
        ├── package.json
        ├── test
        │   └── aws-genomics-cdk.test.ts
        └── tsconfig.json
    ├── containers
        ├── .gitignore
        ├── _common
        │   ├── README.md
        │   ├── aws.dockerfile
        │   ├── build.sh
        │   ├── entrypoint.aws.sh
        │   └── push.sh
        ├── bcftools
        │   └── Dockerfile
        ├── buildspec-nextflow.yml
        ├── buildspec-workflow-tool.yml
        ├── bwa
        │   └── Dockerfile
        ├── nextflow
        │   ├── Dockerfile
        │   └── nextflow.aws.sh
        └── samtools
        │   └── Dockerfile
    ├── ebs-autoscale
        ├── README.md
        ├── bin
        │   └── init-ebs-autoscale.sh
        └── get-amazon-ebs-autoscale.sh
    ├── ecs-additions
        ├── awscli-shim.sh
        ├── ecs-additions-common.sh
        ├── ecs-additions-cromwell.sh
        ├── ecs-additions-nextflow.sh
        ├── ecs-additions-step-functions.sh
        ├── ecs-logs-collector.sh
        ├── fetch_and_run.sh
        └── provision.sh
    ├── gwf-core-codepipeline
        ├── .gitignore
        ├── .npmignore
        ├── README.md
        ├── bin
        │   └── aws-genomics-workflow-code-build.ts
        ├── cdk.json
        ├── jest.config.js
        ├── lib
        │   └── aws-genomics-workflow-code-build-stack.ts
        ├── package-lock.json
        ├── package.json
        └── tsconfig.json
    ├── lambda
        ├── codebuild
        │   ├── lambda.py
        │   └── requirements.txt
        └── ecr
        │   ├── lambda.py
        │   └── requirements.txt
    ├── scripts
        └── nf-aws.py
    └── templates
        ├── README.md
        ├── _common
            └── container-build.template.yaml
        ├── cromwell
            ├── cromwell-and-core.template.yaml
            └── cromwell-resources.template.yaml
        ├── gwfcore
            ├── gwfcore-batch.template.yaml
            ├── gwfcore-code.template.yaml
            ├── gwfcore-efs.template.yaml
            ├── gwfcore-fsx.template.yaml
            ├── gwfcore-iam.template.yaml
            ├── gwfcore-launch-template.template.yaml
            ├── gwfcore-root.template.yaml
            └── gwfcore-s3.template.yaml
        ├── nextflow
            ├── nextflow-and-core.template.yaml
            └── nextflow-resources.template.yaml
        └── step-functions
            ├── sfn-resources-state-machine.template.yaml
            └── sfn-resources.template.yaml


/.cfnlintrc.yaml:
--------------------------------------------------------------------------------
1 | ignore_checks:
2 |   - W3


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 | 
3 | *Description of changes:*
4 | 
5 | 
6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
7 | 


--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
 1 | # Build docs and publish to github pages
 2 | 
 3 | name: github pages
 4 | 
 5 | on:
 6 |   workflow_dispatch:      # Allow manual triggering of the action
 7 |   release:
 8 |     types: [published]
 9 | 
10 | 
11 | jobs:
12 |   deploy:
13 |     runs-on: ubuntu-20.04
14 |     concurrency:
15 |       group: ${{ github.workflow }}-${{ github.ref }}
16 |     steps:
17 |       - uses: actions/checkout@v2
18 |         with:
19 |           ref: 'master'  # TODO: revert this to 'release'
20 | 
21 |       - name: Setup Python
22 |         uses: actions/setup-python@v3.1.2
23 |         with:
24 |           python-version: 3.9
25 |       
26 |       - name: Install Python dependencies
27 |         run: pip install -r requirements.txt
28 | 
29 |       # - name: Clean Docs
30 |       #   run: make clean-docs
31 | 
32 |       - name: Generate Docs
33 |         run: mkdocs build
34 | 
35 |       - name: Deploy To Pages
36 |         uses: peaceiris/actions-gh-pages@v3           # see https://github.com/peaceiris/actions-gh-pages for details
37 |         with:
38 |           github_token: ${{ secrets.GITHUB_TOKEN }}
39 |           publish_branch: gh-pages                    # set to 'gh-pages to publish to github pages'
40 |           publish_dir: ./site                         # Deploy the contents of ./docs to github pages


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
 2 | #
 3 | # You can adjust the behavior by modifying this file.
 4 | # For more information, see:
 5 | # https://github.com/actions/stale
 6 | name: Mark stale issues and pull requests
 7 | 
 8 | on:
 9 |   schedule:
10 |   - cron: '19 9 * * *'
11 | 
12 | jobs:
13 |   stale:
14 | 
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       issues: write
18 |       pull-requests: write
19 | 
20 |     steps:
21 |     - uses: actions/stale@v5
22 |       with:
23 |         # Setting messages to an empty string will cause the automation to skip
24 |         # that category
25 |         ancient-issue-message: Greetings! Sorry to say but this is a very old issue that is probably not getting as much attention as it deserves. We encourage you to check if this is still an issue in the latest release and if you find that this is still a problem, please feel free to open a new one.
26 |         stale-issue-message: Greetings! It looks like this issue hasn’t had any activity for over three months. We encourage you to check if this is still an issue in the latest release. Because it has been longer than three months since the last update on this, and in the absence of more information, we will be closing this issue soon. If you find that this is still a problem, please feel free to provide a comment or add an upvote to prevent automatic closure, or if the issue is already closed, please feel free to open a new one.
27 |         stale-pr-message: Greetings! It looks like this PR hasn’t had any activity for over three months. Add a comment or an upvote to prevent automatic closure, or if the issue is already closed, please feel free to open a new one.
28 | 
29 |         # These labels are required
30 |         stale-issue-label: closing-soon
31 |         exempt-issue-label: automation-exempt
32 |         stale-pr-label: closing-soon
33 |         exempt-pr-label: needs-review
34 |         response-requested-label: response-requested
35 | 
36 |         # Don't set closed-for-staleness label to skip closing very old issues
37 |         # regardless of label
38 |         closed-for-staleness-label: closed-for-staleness
39 | 
40 |         # Issue timing
41 |         days-before-stale: 1
42 |         days-before-close: 1
43 |         days-before-ancient: 90
44 | 
45 |         # If you don't want to mark a issue as being ancient based on a
46 |         # threshold of "upvotes", you can set this here. An "upvote" is
47 |         # the total number of +1, heart, hooray, and rocket reactions
48 |         # on an issue.
49 |         minimum-upvotes-to-exempt: 1
50 | 
51 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
52 |         loglevel: DEBUG
53 |         # Set dry-run to true to not perform label or close actions.
54 |         dry-run: false
55 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: CI
 4 | 
 5 | # Controls when the workflow will run
 6 | on:
 7 |   # Triggers the workflow on push or pull request events but only for the master branch
 8 |   push:
 9 |     branches: [ master ]
10 |   pull_request:
11 |     branches: [ master ]
12 | 
13 |   # Allows you to run this workflow manually from the Actions tab
14 |   workflow_dispatch:
15 | 
16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
17 | jobs:
18 |   test:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
22 |       - uses: actions/checkout@v3
23 |       
24 |       - name: Setup Python
25 |         uses: actions/setup-python@v3.1.2
26 |         with:
27 |           python-version: 3.9
28 | 
29 |       - name: Get pip cache dir
30 |         id: pip-cache
31 |         run: |
32 |           echo "::set-output name=dir::$(pip cache dir)"
33 | 
34 |       - name: pip cache
35 |         uses: actions/cache@v3
36 |         with:
37 |           path: ${{ steps.pip-cache.outputs.dir }}
38 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
39 |           restore-keys: |
40 |             ${{ runner.os }}-pip-
41 |       
42 |       - name: Install Python dependencies
43 |         run: pip install -r requirements.txt
44 |       
45 |       - name: Test
46 |         run: bash _scripts/test.sh
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | /.idea/markdown-navigator.xml
 3 | /.idea/markdown-navigator/profiles_settings.xml
 4 | /.idea/misc.xml
 5 | /.idea/modules.xml
 6 | /.idea/vcs.xml
 7 | /.idea/workspace.xml
 8 | 
 9 | #********** osx template**********
10 | 
11 | .DS_Store
12 | 
13 | # Thumbnails
14 | ._*
15 | 
16 | # Files that might appear on external disk
17 | .Spotlight-V100
18 | .Trashes
19 | 
20 | 
21 | #********** windows template**********
22 | 
23 | # Windows image file caches
24 | Thumbs.db
25 | 
26 | # Folder config file
27 | Desktop.ini
28 | 
29 | # Recycle Bin used on file shares
30 | $RECYCLE.BIN/
31 | 
32 | 
33 | #********** emacs template**********
34 | 
35 | *~
36 | \#*\#
37 | /.emacs.desktop
38 | /.emacs.desktop.lock
39 | .elc
40 | auto-save-list
41 | tramp
42 | .\#*
43 | 
44 | # Org-mode
45 | .org-id-locations
46 | *_archive
47 | 
48 | #********** repo specific ignores **********
49 | 
50 | tmp/
51 | site/
52 | artifacts/
53 | *pem
54 | *tar.gz
55 | Makefile
56 | __pycache__
57 | publish
58 | launch.sh
59 | LICENSE-*
60 | src/templates/tests
61 | /aws-genomics-workflows.iml
62 | _ignore
63 | dist/
64 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | 
 4 |   - "3.6"
 5 | 
 6 | before_install:
 7 | 
 8 |   - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
 9 |   - chmod +x miniconda.sh
10 |   - ./miniconda.sh -b -f
11 |   - export PATH=$HOME/miniconda3/bin:$PATH
12 |   - which conda
13 |   - conda update --yes conda
14 | 
15 | install:
16 | 
17 |   - conda env create --file environment.yaml
18 |   - source activate mkdocs
19 | 
20 | script:
21 | 
22 |   - bash _scripts/test.sh
23 | 
24 | before_deploy:
25 | 
26 |   - pip install awscli --upgrade
27 |   - bash _scripts/configure-deploy.sh --clobber
28 | 
29 | deploy:
30 |   - provider: script
31 |     script: bash _scripts/deploy.sh --public --verbose production
32 |     skip_cleanup: true
33 |     on:
34 |       repo: aws-samples/aws-genomics-workflows
35 |       branch: release
36 |       tags: true
37 |   - provider: script
38 |     script: bash _scripts/deploy.sh --public --verbose test
39 |     skip_cleanup: true
40 |     on:
41 |       repo: aws-samples/aws-genomics-workflows
42 |       branch: master


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/aws-samples/genomics-workflows/issues), or [recently closed](https://github.com/aws-samples/genomics-workflows/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/genomics-workflows/labels/help%20wanted) issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/aws-samples/genomics-workflows/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Genomics Workflows on AWS
 2 | 
 3 | :warning: This site and related code are no longer actively maintained as of 2023-07-31. :warning:
 4 | 
 5 | This allows all code and assets presented here to remain publicly available for historical reference purposes only.
 6 | 
 7 | For more up to date solutions to running Genomics workflows on AWS checkout:
 8 | 
 9 | - [Amazon Omics](https://aws.amazon.com/omics/) - a fully managed service for storing, processing, and querying genomic, transcriptomic, and other omics data into insights. [Omics Workflows](https://docs.aws.amazon.com/omics/latest/dev/workflows.html) provides fully managed execution of pre-packaged [Ready2Run](https://docs.aws.amazon.com/omics/latest/dev/service-workflows.html) workflows or private workflows you create using WDL or Nextflow.
10 | - [Amazon Genomics CLI](https://aws.amazon.com/genomics-cli/) - an open source tool that automates deploying and running workflow engines in AWS. AGC uses the same architectural patterns described here (i.e. operating workflow engines with AWS Batch). It provides support for running WDL, Nextflow, Snakemake, and CWL based workflows.
11 | 
12 | ---
13 | 
14 | This repository is the source code for [Genomics Workflows on AWS]().  It contains markdown documents that are used to build the site as well as source code (CloudFormation templates, scripts, etc) that can be used to deploy AWS infrastructure for running genomics workflows.
15 | 
16 | If you want to get the latest version of these solutions up and running quickly, it is recommended that you deploy stacks using the launch buttons available via the [hosted guide]().
17 | 
18 | If you want to customize these solutions, you can create your own distribution using the instructions below.
19 | 
20 | ## Creating your own distribution
21 | 
22 | Clone the repo
23 | 
24 | ```bash
25 | git clone https://github.com/aws-samples/aws-genomics-workflows.git
26 | ```
27 | 
28 | Create an S3 bucket in your AWS account to use for the distribution deployment
29 | 
30 | ```bash
31 | aws s3 mb <dist-bucketname>
32 | ```
33 | 
34 | Create and deploy a distribution from source
35 | 
36 | ```bash
37 | cd aws-genomics-workflows
38 | bash _scripts/deploy.sh --deploy-region <region> --asset-profile <profile-name> --asset-bucket s3://<dist-bucketname> test
39 | ```
40 | 
41 | This will create a `dist` folder in the root of the project with subfolders `dist/artifacts` and `dist/templates` that will be uploaded to the S3 bucket you created above.
42 | 
43 | Use `--asset-profile` option to specify an AWS profile to use to make the deployment.
44 | 
45 | **Note**: the region set for `--deploy-region` should match the region the bucket `<dist-bucketname>` is created in.
46 | 
47 | You can now use your deployed distribution to launch stacks using the AWS CLI. For example, to launch the GWFCore stack:
48 | 
49 | ```bash
50 | TEMPLATE_ROOT_URL=https://<dist-bucketname>.s3-<region>.amazonaws.com/test/templates
51 | 
52 | aws cloudformation create-stack \
53 |     --region <region> \
54 |     --stack-name <stackname> \
55 |     --template-url $TEMPLATE_ROOT_URL/gwfcore/gwfcore-root.template.yaml \
56 |     --capabilities CAPABILITY_IAM CAPABILITY_AUTO_EXPAND \
57 |     --parameters \
58 |         ParameterKey=VpcId,ParameterValue=<vpc-id> \
59 |         ParameterKey=SubnetIds,ParameterValue=\"<subnet-id-1>,<subnet-id-2>,...\" \
60 |         ParameterKey=ArtifactBucketName,ParameterValue=<dist-bucketname> \
61 |         ParameterKey=TemplateRootUrl,ParameterValue=$TEMPLATE_ROOT_URL \
62 |         ParameterKey=S3BucketName,ParameterValue=<store-buketname> \
63 |         ParameterKey=ExistingBucket,ParameterValue=false
64 | 
65 | ```
66 | 
67 | ## Shared File System Support
68 | 
69 | Amazon EFS is supported out of the box for `GWFCore` and `Nextflow`. You have two options to use EFS.
70 | 
71 | 1. **Create a new EFS File System:** Be sure to have `CreateEFS` set to `Yes` and also include the total number of subnets.
72 | 2. **Use an Existing EFS File System:** Be sure to specify the EFS ID in the `ExistingEFS` parameter. This file system should be accessible from every subnet you specify.
73 | 
74 | Following successful deployment of `GWFCore`, when creating your Nextflow Resources, set `MountEFS` to `Yes`.
75 | 
76 | ## Building the documentation
77 | 
78 | The documentation is built using mkdocs.
79 | 
80 | Install dependencies:
81 | 
82 | ```bash
83 | $ conda env create --file environment.yaml
84 | ```
85 | 
86 | This will create a `conda` environment called `mkdocs`
87 | 
88 | Build the docs:
89 | 
90 | ```bash
91 | $ conda activate mkdocs
92 | $ mkdocs build
93 | ```
94 | 
95 | ## License Summary
96 | 
97 | This library is licensed under the MIT-0 License. See the LICENSE file.
98 | 


--------------------------------------------------------------------------------
/_scripts/configure-deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Create a default ~/.aws/configure file for Travis testing
 4 | 
 5 | set -e
 6 | 
 7 | # This script expects the following environment variable(s)
 8 | # ASSET_ROLE_ARN: the AWS role ARN that is used to publish assets
 9 | 
10 | usage() {
11 |     cat <<EOM
12 |     Usage:
13 |     $(basename $0) [--clobber]
14 | 
15 |     --clobber  Overwrite ~/.aws/configure file without asking
16 | EOM
17 | }
18 | 
19 | CLOBBER=''
20 | PARAMS=""
21 | while (( "$#" )); do
22 |     case "$1" in
23 |         --clobber)
24 |             CLOBBER=1
25 |             shift
26 |             ;;
27 |         --help)
28 |             usage
29 |             exit 0
30 |             ;;
31 |         --) # end optional argument parsing
32 |             shift
33 |             break
34 |             ;;
35 |         -*|--*=)
36 |             echo "Error: unsupported argument $1" >&2
37 |             exit 1
38 |             ;;
39 |         *) # positional agruments
40 |             PARAMS="$PARAMS $1"
41 |             shift
42 |             ;;
43 |     esac
44 | done
45 | eval set -- "$PARAMS"
46 | 
47 | if [ -z $CLOBBER ]; then
48 |     while true; do
49 |         read -p "Overwrite ~/.aws/config file [y/n]? " yn
50 |         case $yn in
51 |             [Yy]* ) CLOBBER=1; break;;
52 |             [Nn]* ) echo "Exiting"; exit;;
53 |             * ) echo "Please answer yes or no.";;
54 |         esac
55 |     done
56 | fi
57 | 
58 | mkdir -p $HOME/.aws
59 | cat << EOF > $HOME/.aws/config
60 | [default]
61 | region = us-east-1
62 | output = json
63 | 
64 | [profile asset-publisher]
65 | region = us-east-1
66 | role_arn = ${ASSET_ROLE_ARN}
67 | credential_source = Environment
68 | EOF
69 | 
70 | cat $HOME/.aws/config


--------------------------------------------------------------------------------
/_scripts/make-dist.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # make-dist.sh: Create distribution artifacts
  4 | # This script is expected to be in a subdirectory of the top-level directory
  5 | # It accesses the subdirectory 'src', and creates a subdirectory 'dist', in the top-level directory:
  6 | #    .
  7 | #    |-_scripts
  8 | #    |---make-dist.sh
  9 | #    |-dist
 10 | #    |-src
 11 | 
 12 | 
 13 | VERBOSE=""
 14 | PARAMS=""
 15 | while (( "$#" )); do
 16 |     case "$1" in
 17 |         --verbose)
 18 |             VERBOSE='-v'
 19 |             shift
 20 |             ;;
 21 |         --) # end optional argument parsing
 22 |             shift
 23 |             break
 24 |             ;;
 25 |         -*|--*=)
 26 |             echo "Error: unsupported argument $1" >&2
 27 |             exit 1
 28 |             ;;
 29 |         *) # positional agruments
 30 |             PARAMS="$PARAMS $1"
 31 |             shift
 32 |             ;;
 33 |     esac
 34 | done
 35 | eval set -- "$PARAMS"
 36 | 
 37 | echo "checking for dependencies"
 38 | 
 39 | DEPENDENCIES=$(cat <<EOF
 40 | curl
 41 | jq
 42 | pip
 43 | tar
 44 | zip
 45 | EOF
 46 | )
 47 | 
 48 | for dep in $DEPENDENCIES; do
 49 |     dep_path=`command -v $dep`
 50 |     if [[ $dep_path ]]; then
 51 |         echo "requirement '$dep' found ($dep_path). ok"
 52 |     else
 53 |         echo "requirement '$dep' not found. aborting"
 54 |         exit 1
 55 |     fi
 56 | done
 57 | 
 58 | # fail on any error
 59 | set -e
 60 | 
 61 | CWD=`pwd`
 62 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 63 | INSTALL_DIR=$(dirname $DIR)
 64 | SOURCE_PATH=$INSTALL_DIR/src
 65 | DIST_PATH=$INSTALL_DIR/dist
 66 | 
 67 | TEMP_PATH=$DIST_PATH/tmp
 68 | ARTIFACT_PATH=$DIST_PATH/artifacts
 69 | TEMPLATES_PATH=$DIST_PATH/templates
 70 | 
 71 | if [ ! -d $DIST_PATH ]; then
 72 |     mkdir -p $DIST_PATH
 73 | fi
 74 | 
 75 | cd $DIST_PATH
 76 | 
 77 | # clean up previous dist build
 78 | echo "removing previous dist in $DIST_PATH"
 79 | [ ! -z $DIR ] && rm -rf $DIST_PATH/*
 80 | 
 81 | for d in $TEMP_PATH $ARTIFACT_PATH $TEMPLATES_PATH; do
 82 |     if [ ! -d $d ];
 83 |     then
 84 |         echo "creating $d"
 85 |         mkdir -p $d
 86 |     fi
 87 | done
 88 | 
 89 | # package ebs-autoscale
 90 | # combines the latest release of amazon-ebs-autoscale with compatibility shim
 91 | # scripts in ./ebs-autoscale/
 92 | echo "packaging amazon-ebs-autoscale"
 93 | cd $TEMP_PATH
 94 | 
 95 | RESPONSE=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest")
 96 | EBS_AUTOSCALE_VERSION=$(echo $RESPONSE | jq -r .tag_name)
 97 | if [[ $EBS_AUTOSCALE_VERSION = 'null' ]]; then
 98 |     echo "ERROR: $RESPONSE"
 99 |     exit 1
100 | fi
101 | curl --silent -L \
102 |     "https://github.com/awslabs/amazon-ebs-autoscale/archive/${EBS_AUTOSCALE_VERSION}.tar.gz" \
103 |     -o ./amazon-ebs-autoscale.tar.gz 
104 | 
105 | echo "copying $(tar -tzf ./amazon-ebs-autoscale.tar.gz | wc -l) files from ebs-autoscale $EBS_AUTOSCALE_VERSION into tmp/amazon-ebs-autoscale/"
106 | tar $VERBOSE -xzf ./amazon-ebs-autoscale.tar.gz
107 | mv ./amazon-ebs-autoscale*/ ./amazon-ebs-autoscale
108 | echo $EBS_AUTOSCALE_VERSION > ./amazon-ebs-autoscale/VERSION
109 | 
110 | echo "copying src/ebs-autoscale with $(find $SOURCE_PATH/ebs-autoscale/ -type f | wc -l) files to tmp/"
111 | cp $VERBOSE -Rf $SOURCE_PATH/ebs-autoscale .
112 | echo "copying $(find amazon-ebs-autoscale -type f | wc -l) files from tmp/amazon-ebs-autoscale/ to tmp/ebs-autoscale/"
113 | cp $VERBOSE -Rf ./amazon-ebs-autoscale/* ./ebs-autoscale/
114 | echo "creating artifacts/aws-ebs-autoscale.tgz with $(find ./ebs-autoscale/ -type f | wc -l) files from tmp/ebs-autoscale/"
115 | tar $VERBOSE -czf $ARTIFACT_PATH/aws-ebs-autoscale.tgz ./ebs-autoscale/
116 | 
117 | # add a copy of the release tarball for naming consistency
118 | echo "creating artifacts/amazon-ebs-autoscale.tgz with $(find ./amazon-ebs-autoscale/ -type f | wc -l) files from tmp/amazon-ebs-autoscale/"
119 | tar $VERBOSE -czf $ARTIFACT_PATH/amazon-ebs-autoscale.tgz ./amazon-ebs-autoscale
120 | 
121 | # add a retrieval script
122 | cp $VERBOSE -f $SOURCE_PATH/ebs-autoscale/get-amazon-ebs-autoscale.sh $ARTIFACT_PATH
123 | 
124 | # package crhelper lambda(s)
125 | cd $SOURCE_PATH/lambda
126 | for fn in `ls .`; do
127 |     echo "packaging crhelper lambda $fn"
128 |     mkdir -p $TEMP_PATH/lambda/$fn
129 |     cp $VERBOSE -R $SOURCE_PATH/lambda/$fn/. $TEMP_PATH/lambda/$fn
130 | 
131 |     cd $TEMP_PATH/lambda/$fn
132 |     [ -z $VERBOSE ] && P_QUIET='--quiet' || P_QUIET=''
133 |     pip $P_QUIET install -t . -r requirements.txt
134 |     echo "creating artifacts/lambda-${fn}.zip with $(find . -type f | wc -l) files"
135 |     [ -z $VERBOSE ] && Z_QUIET='-q' || Z_QUIET=''
136 |     zip $Z_QUIET -r $ARTIFACT_PATH/lambda-$fn.zip .
137 | done
138 | 
139 | # package ecs-additions
140 | echo "packaging ecs-additions"
141 | 
142 | cd $TEMP_PATH
143 | mkdir -p $TEMP_PATH/ecs-additions
144 | cp $VERBOSE -R $SOURCE_PATH/ecs-additions/. $TEMP_PATH/ecs-additions
145 | 
146 | # add the amazon-ebs-autoscale retrieval script to additions
147 | cp $VERBOSE $SOURCE_PATH/ebs-autoscale/get-amazon-ebs-autoscale.sh $TEMP_PATH/ecs-additions
148 | 
149 | # keep tarball for backwards compatibilty
150 | cd $TEMP_PATH
151 | tar $VERBOSE -czf $ARTIFACT_PATH/aws-ecs-additions.tgz ./ecs-additions/
152 | 
153 | # zip file for codecommit repo
154 | cd $TEMP_PATH/ecs-additions/
155 | zip $Z_QUIET -r $ARTIFACT_PATH/aws-ecs-additions.zip ./*
156 | 
157 | 
158 | # package container code
159 | echo "packaging container definitions with $(find $SOURCE_PATH/containers -type f | wc -l) files"
160 | cd $SOURCE_PATH/containers
161 | zip $Z_QUIET -r $ARTIFACT_PATH/containers.zip ./*
162 | 
163 | 
164 | # add templates to dist
165 | echo "copying $(find $SOURCE_PATH/templates/ -type f | wc -l) cloudformation templates"
166 | cp $VERBOSE -R $SOURCE_PATH/templates/. $TEMPLATES_PATH
167 | 
168 | 
169 | # cleanup
170 | echo "removing temp files"
171 | rm -rf $TEMP_PATH
172 | 
173 | cd $CWD


--------------------------------------------------------------------------------
/_scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # check cfn templates for errors
 6 | cfn-lint --version
 7 | cfn-lint src/templates/**/*.template.yaml
 8 | 
 9 | # make sure that site can build
10 | mkdocs build


--------------------------------------------------------------------------------
/docs/containers/container-examples.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/containers/container-examples.md


--------------------------------------------------------------------------------
/docs/containers/container-introduction.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/containers/container-introduction.md


--------------------------------------------------------------------------------
/docs/core-env/build-custom-distribution.md:
--------------------------------------------------------------------------------
 1 | # Building Custom Resources
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | This section describes how to build and upload templates and artifacts to use in a customized deployment.  Once uploaded, the locations of the templates and artifacts are used when deploying the Nextflow on AWS Batch solution (see [Customized Deployment](custom-deploy.md))
 6 | 
 7 | ## Building a Custom Distribution
 8 | 
 9 | This step involves building a distribution of templates and artifacts from the solution's source code.
10 | 
11 | First, create a local clone of the [Genomics Workflows on AWS](https://github.com/aws-samples/aws-genomics-workflows) source code.  The code base contains several directories:
12 | 
13 | * `_scripts/`: Shell scripts for building and uploading the customized distribution of templates and artifacts
14 | * `docs/`: Source code for the documentation, written in [MarkDown](https://markdownguide.org) for the [MkDocs](https://mkdocs.org) publishing platform.  This documentation may be modified, expanded, and contributed in the same way as source code.
15 | * `src/`: Source code for the components of the solution:
16 |     * `containers/`: CodeBuild buildspec files for building AWS-specific container images and pushing them to ECR
17 |         * `_common/`
18 |             * `build.sh`: A generic build script that first builds a base image for a container, then builds an AWS specific image
19 |             * `entrypoint.aws.sh`: A generic entrypoint script that wraps a call to a binary tool in the container with handlers data staging from/to S3
20 |         * `nextflow/`
21 |             * `Dockerfile`
22 |             * `nextflow.aws.sh`: Docker entrypoint script to execute the Nextflow workflow on AWS Batch
23 |     * `ebs-autoscale/`
24 |         * `get-amazon-ebs-autoscale.sh`: Script to retrieve and install [Amazon EBS Autoscale](https://github.com/awslabs/amazon-ebs-autoscale)
25 |     * `ecs-additions/`: Scripts to be installed on ECS host instances to support the distribution
26 |         * `awscli-shim.sh`: Installed as `/opt/aws-cli/bin/aws` and mounted onto the container, allows container images without full glibc to use the AWS CLI v2 through supplied shared libraries (especially libz) and `LD_LIBRARY_PATH`.
27 |         * `ecs-additions-common.sh`: Utility script to install `fetch_and_run.sh`, Nextflow and Cromwell shims, and swap space
28 |         * `ecs-additions-cromwell-linux2-worker.sh`: 
29 |         * `ecs-additions-cromwell.sh`: 
30 |         * `ecs-additions-nextflow.sh`: 
31 |         * `ecs-additions-step-functions.sh`: 
32 |         * `fetch_and_run.sh`: Uses AWS CLI to download and run scripts and zip files from S3
33 |         * `provision.sh`: Appended to the userdata in the launch template created by [gwfcore-launch-template](custom-deploy.md): Starts SSM Agent, ECS Agent, Docker; runs `get-amazon-ebs-autoscale.sh`, `ecs-additions-common.sh` and orchestrator-specific `ecs-additions-` scripts.
34 |     * `lambda/`: Lambda functions to create, modify or delete ECR registries or CodeBuild jobs
35 |     * `templates/`: CloudFormation templates for the solution stack, as described in [Customized Deployment](custom-deploy.md)
36 | 
37 | ## Deploying a Custom Distribution
38 | 
39 | The script `_scripts/deploy.sh` will create a custom distribution of artifacts and templates from files in the source tree, then upload this distribution to an S3 bucket.  It will optionally also build and deploy a static documentation site from the Markdown documentation files. Its usage is:
40 | 
41 | ```sh
42 |     deploy.sh [--site-bucket BUCKET] [--asset-bucket BUCKET] 
43 |               [--asset-profile PROFILE] [--deploy-region REGION] 
44 |               [--public] [--verbose] 
45 |               STAGE
46 | 
47 |     --site-bucket BUCKET        Deploy documentation site to BUCKET
48 |     --asset-bucket BUCKET       Deploy assets to BUCKET
49 |     --asset-profile PROFILE     Use PROFILE for AWS CLI commands
50 |     --deploy-region REGION      Deploy in region REGION
51 |     --public                    Deploy to public bucket with '--acl public-read' (Default false)
52 |     --verbose                   Display more output
53 |     STAGE                       'test' or 'production'
54 | ```
55 | 
56 | When running this script from the command line, use the value `test` for the stage.  This will deploy the templates and artifacts into a directory `test` in your deployment bucket:
57 | 
58 | ```
59 | $ aws s3 ls s3://my-deployment-bucket/test/
60 |     PRE artifacts/
61 |     PRE templates/
62 | ```
63 | 
64 | Use these values when deploying a customized installation, as described in [Customized Deployment](custom-deploy.md), sections 'Artifacts and Nested Stacks' and 'Nextflow'.  In the example from above, the values to use would be:
65 | 
66 | * Artifact S3 Bucket Name: `my-deployment-bucket`
67 | * Artifact S3 Prefix: `test/artifacts`
68 | * Template Root URL: `https://my-deployment-bucket.s3.amazonaws.com/test/templates`
69 | 
70 | The use of `production` for stage is reserved for deployments from a Travis CI/CD environment; this usage will deploy into a subdirectory named after the current release tag.


--------------------------------------------------------------------------------
/docs/core-env/create-s3-bucket.md:
--------------------------------------------------------------------------------
 1 | # Core: Data Storage
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | You will need a robust location to store your input and output data.  Genomics data files often equal or exceed 100GB per file.  In addition to input sample files, genomics data processing typically relies on additional items like reference sequences or annotation databases that can be equally large.
 6 | 
 7 | The following are key criteria for storing data for genomics workflows
 8 | 
 9 | * accessible to compute
10 | * secure
11 | * durable
12 | * capable of handling large files
13 | 
14 | Amazon S3 buckets meet all of the above conditions.  S3 also makes it easy to collaboratively work on such large datasets because buckets and the data stored in them are globally available.
15 | 
16 | You can use an S3 bucket to store both your input data and workflow results.
17 | 
18 | ## Create an S3 Bucket
19 | 
20 | You can use an existing bucket for your workflows, or you can create a new one using the methods below.
21 | 
22 | ### Automated via Cloudformation
23 | 
24 | | Name | Description | Source | Launch Stack |
25 | | -- | -- | :--: | :--: |
26 | {{ cfn_stack_row("Amazon S3 Bucket", "GWFCore-S3", "gwfcore/gwfcore-s3.template.yaml", "Creates a secure Amazon S3 bucket to read from and write results to.", enable_cfn_button=False) }}
27 | 
28 | !!! info
29 |     The launch button has been disabled above since this template is part of a set of nested templates. It is not recommended to launch it independently of its intended parent stack.
30 | 
31 | ### Manually via the AWS Console
32 | 
33 | * Go to the S3 Console
34 | * Click on the "Create Bucket" button
35 | 
36 | In the dialog that opens:
37 | 
38 | * Provide a "Bucket Name".  This needs to be globally unique.
39 | 
40 | * Select the region for the bucket.  Buckets are globally accessible, but the data resides on physical hardware within a specific region.  It is best to choose a region that is closest to where you are and where you will launch compute resources to reduce network latency and avoid inter-region transfer costs.
41 | 
42 | The default options for bucket configuration are sufficient for the marjority of use cases.
43 | 
44 | * Click the "Create" button to accept defaults and create the bucket.
45 | 


--------------------------------------------------------------------------------
/docs/core-env/custom-deploy.md:
--------------------------------------------------------------------------------
 1 | # Customized Deployment
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | Deployments of the 'Nextflow on AWS Batch' solution are based on nested CloudFormation templates, and on artifacts comprising scripts, software packages, and configuration files.  The templates and artifacts are stored in S3 buckets, and their S3 URLs are used when launching the top-level template and as parameters to that template's deployment.  
 6 | 
 7 | ## VPC
 8 | The quick start link deploys the [AWS VPC Quickstart](https://aws.amazon.com/quickstart/architecture/vpc/), which creates a VPC with up to 4 Availability Zones, each with a public subnet and a private subnet with NAT Gateway access to the Internet.
 9 | 
10 | ## Genomics Workflow Core
11 | This quick start link deploys the CloudFormation template `gwfcore-root.template.yaml` for the Genomics Workflow Core (GWFCore) from the [Genomics Workflows on AWS](https://github.com/aws-samples/aws-genomics-workflows) solution.  This template launches a number of nested templates, as shown below:
12 | 
13 | * Root Stack __gwfcore-root__ - Top level template for Genomics Workflow Core
14 |     * S3 Stack __gwfcore-s3__ - S3 bucket (new or existing) for storing analysis results
15 |     * IAM Stack __gwfcore-iam__ - Creates IAM roles to use with AWS Batch scalable genomics workflow environment
16 |     * Code Stack __gwfcore-code__ - Creates AWS CodeCommit repos and CodeBuild projects for Genomics Workflows Core assets and artifacts
17 |     * Launch Template Stack __gwfcore-launch-template__ - Creates an EC2 Launch Template for AWS Batch based genomics workflows
18 |     * Batch Stack __gwfcore-batch__ - Deploys resource for a AWS Batch environment that is suitable for genomics, including default and high-priority JobQueues
19 | 
20 | ### Root Stack
21 | The quick start solution links to the CloudFormation console, where the 'Amazon S3 URL' field is prefilled with the S3 URL of a copy of the root stack template, hosted in the public S3 bucket __aws-genomics-workflows__.
22 | 
23 | <img src="https://dpkk088kye7gn.cloudfront.net/aws-genomics-workflows/docs/images/custom-deploy-0.png"
24 |      alt="custom-deploy-0"
25 |      width="100%" height="100%"
26 |      class="screenshot" />
27 | 
28 | To use a customized root stack, upload your modified stack template to an S3 bucket (see [Building a Custom Distribution](build-custom-distribution.md)), and specify that template's URL in 'Amazon S3 URL'.
29 | 
30 | ### Artifacts and Nested Stacks
31 | The subsequent screen, 'Specify Stack Details', allows for customization of the deployed resources in the 'Distribution Configuration' section.
32 | 
33 | <img src="https://dpkk088kye7gn.cloudfront.net/aws-genomics-workflows/docs/images/custom-deploy-1.png"
34 |      alt="custom-deploy-1"
35 |      width="70%" height="70%"
36 |      class="screenshot" />
37 | 
38 | * __Artifact S3 Bucket Name__ and __Artifact S3 Prefix__ define the location of the artifacts uploaded prior to this deployment.  By default, pre-prepared artifacts are stored in the __aws-genomics-workflows__ bucket.  
39 | * __Template Root URL__ defines the bucket and prefix used to store nested templates, called by the root template.  
40 | 
41 | To use your own modified artifacts or nested templates, build and upload as described in [Building a Custom Distribution](build-custom-distribution.md), and specify the  bucket and prefix in the fields above.
42 | 
43 | ## Workflow Orchestrators
44 | ### Nextflow
45 | This quick start deploys the Nextflow template `nextflow-resources.template.yaml`, which launches one nested stack:
46 | 
47 | * Root Stack __nextflow-resources__ - Creates resources specific to running Nextflow on AWS
48 |     * Container Build Stack __container-build__ - Creates resources for building a Docker container image using CodeBuild, storing the image in ECR, and optionally creating a corresponding Batch Job Definition
49 | 
50 | The nextflow root stack is specified in the same way as the GWFCore root stack, above, and a location for a modified root stack may be specified as with the Core stack.
51 | 
52 | The subsequent 'Specify Stack Details' screen has fields allowing the customization of the Nextflow deployment.
53 | 
54 | <img src="https://dpkk088kye7gn.cloudfront.net/aws-genomics-workflows/docs/images/nextflow-0.png"
55 |      alt="nextflow-0"
56 |      width="70%" height="70%"
57 |      class="screenshot" />
58 | 
59 | * __S3NextflowPrefix__, __S3LogsDirPrefix__, and __S3WorkDirPrefix__ specify the path within the GWFCore bucket in which to store per-run data and log files.
60 | * __TemplateRootUrl__ specifies the path to the nested templates called by the Nextflow root template, as with the GWFCore root stack.
61 | 


--------------------------------------------------------------------------------
/docs/core-env/images/aws-genomics-workflows-high-level-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/core-env/images/aws-genomics-workflows-high-level-arch.png


--------------------------------------------------------------------------------
/docs/core-env/images/ebs-autoscale.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/core-env/images/ebs-autoscale.png


--------------------------------------------------------------------------------
/docs/core-env/introduction.md:
--------------------------------------------------------------------------------
 1 | # Core: Introduction
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | A high level view of the architecture you will need to run workflows is shown is below.
 6 | 
 7 | ![high level architecture](images/aws-genomics-workflows-high-level-arch.png)
 8 | 
 9 | This section of the guide details the common components required for job execution and data storage. This includes the following:
10 | 
11 | * A place to store your input data and generated results
12 | * Access controls to your data and compute resources
13 | * Code and artifacts used to provision compute resources
14 | * Containerized task scheduling and execution
15 | 
16 | The above is referred to here as the "Genomics Workflows Core". To launch this core in your AWS account, use the Cloudformation template below.
17 | 
18 | | Name | Description | Source | Launch Stack |
19 | | -- | -- | :--: | :--: |
20 | {{ cfn_stack_row("Genomics Workflow Core", "gwfcore", "gwfcore/gwfcore-root.template.yaml", "Create EC2 Launch Templates, AWS Batch Job Queues and Compute Environments, a secure Amazon S3 bucket, and IAM policies and roles within an **existing** VPC. _NOTE: You must provide VPC ID, and subnet IDs_.") }}
21 | 
22 | The core is agnostic of the workflow orchestrator you intended to use, and can be installed multiple times in your account if needed (e.g. for use by different projects). Each installation uses a `Namespace` value to group resources accordingly. By default, the `Namespace` is set to the stack name, which must be unique within an AWS region.
23 | 
24 | !!! info
25 |     To create all of the resources described, the Cloudformation template above uses [Nested Stacks](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-nested-stacks.html). This is a way to modularize complex stacks and enable reuse. The individual nested stack templates are intended to be run from a parent or "root" template. On the following pages, the individual nested stack templates are available for viewing only.
26 | 


--------------------------------------------------------------------------------
/docs/disclaimer.md:
--------------------------------------------------------------------------------
1 | # Disclaimer
2 | 
3 | {{ deprecation_notice() }}
4 | 
5 | The architectures and solutions presented in this guide are provided "as is" per the underlying [LICENSE](https://github.com/aws-samples/aws-genomics-workflows/blob/master/LICENSE).  Before implementing anything described here in a production setting we recommended that you consult with your AWS account team regarding your specific requirements for performance, scalability, and security via a [Well Architected Review](https://aws.amazon.com/architecture/well-architected/).


--------------------------------------------------------------------------------
/docs/extra.css:
--------------------------------------------------------------------------------
 1 | .launch-button {
 2 |     display: block;
 3 | 
 4 |     color: white !important;
 5 |     background-color: #ec7211;
 6 |     
 7 |     border-radius: .25ex .25ex .25ex .25ex;
 8 |     border: 1px solid #eb5f07;
 9 |     
10 |     text-align: center;
11 |     text-decoration: none;
12 |     
13 |     /* padding: 0.5ex 2ex; */
14 |     width: 64px;
15 | 
16 |     line-height: 0.5;
17 | }
18 | 
19 | .launch-button-disabled {
20 |     opacity: 0.2;
21 |     cursor: not-allowed;
22 | }
23 | 
24 | .launch-button:hover, .launch-button:visited:hover {
25 |     background-color: #eb5f07;
26 |     color: white !important;
27 | }
28 | 
29 | .launch-button:visited {
30 |     color: white !important;
31 | }
32 | 
33 | .material-icons {
34 |     font-family: 'Material Icons';
35 |     font-weight: normal;
36 |     font-style: normal;
37 |     font-size: 24px;  /* Preferred icon size */
38 |     display: inline-block;
39 |     line-height: 1;
40 |     text-transform: none;
41 |     letter-spacing: normal;
42 |     word-wrap: normal;
43 |     white-space: nowrap;
44 |     direction: ltr;
45 |   
46 |     /* Support for all WebKit browsers. */
47 |     -webkit-font-smoothing: antialiased;
48 |     /* Support for Safari and Chrome. */
49 |     text-rendering: optimizeLegibility;
50 |   
51 |     /* Support for Firefox. */
52 |     -moz-osx-font-smoothing: grayscale;
53 |   
54 |     /* Support for IE. */
55 |     font-feature-settings: 'liga';
56 |   }
57 | 
58 |   .md-header, .md-footer, .md-footer-nav, .md-footer-meta {
59 |     background-color: #232f3e !important;
60 |   }
61 | 
62 |   .screenshot {
63 |     style: "float: left";
64 |     margin: 10px;
65 |     border: 1px solid lightgrey;
66 |   }


--------------------------------------------------------------------------------
/docs/images/AWS_logo_RGB.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 30"><defs><style>.cls-1{fill:#252f3e;}.cls-2{fill:#f90;fill-rule:evenodd;}</style></defs><title>AWS-Logo_Full-Color</title><path class="cls-1" d="M14.09,10.85a4.7,4.7,0,0,0,.19,1.48,7.73,7.73,0,0,0,.54,1.19.77.77,0,0,1,.12.38.64.64,0,0,1-.32.49l-1,.7a.83.83,0,0,1-.44.15.69.69,0,0,1-.49-.23,3.8,3.8,0,0,1-.6-.77q-.25-.42-.51-1a6.14,6.14,0,0,1-4.89,2.3,4.54,4.54,0,0,1-3.32-1.19,4.27,4.27,0,0,1-1.22-3.2A4.28,4.28,0,0,1,3.61,7.75,6.06,6.06,0,0,1,7.69,6.46a12.47,12.47,0,0,1,1.76.13q.92.13,1.91.36V5.73a3.65,3.65,0,0,0-.79-2.66A3.81,3.81,0,0,0,7.86,2.3a7.71,7.71,0,0,0-1.79.22,12.78,12.78,0,0,0-1.79.57,4.55,4.55,0,0,1-.58.22l-.26,0q-.35,0-.35-.52V2a1.09,1.09,0,0,1,.12-.58,1.2,1.2,0,0,1,.47-.35A10.88,10.88,0,0,1,5.77.32,10.19,10.19,0,0,1,8.36,0a6,6,0,0,1,4.35,1.35,5.49,5.49,0,0,1,1.38,4.09ZM7.34,13.38a5.36,5.36,0,0,0,1.72-.31A3.63,3.63,0,0,0,10.63,12,2.62,2.62,0,0,0,11.19,11a5.63,5.63,0,0,0,.16-1.44v-.7a14.35,14.35,0,0,0-1.53-.28,12.37,12.37,0,0,0-1.56-.1,3.84,3.84,0,0,0-2.47.67A2.34,2.34,0,0,0,5,11a2.35,2.35,0,0,0,.61,1.76A2.4,2.4,0,0,0,7.34,13.38Zm13.35,1.8a1,1,0,0,1-.64-.16,1.3,1.3,0,0,1-.35-.65L15.81,1.51a3,3,0,0,1-.15-.67.36.36,0,0,1,.41-.41H17.7a1,1,0,0,1,.65.16,1.4,1.4,0,0,1,.33.65l2.79,11,2.59-11A1.17,1.17,0,0,1,24.39.6a1.1,1.1,0,0,1,.67-.16H26.4a1.1,1.1,0,0,1,.67.16,1.17,1.17,0,0,1,.32.65L30,12.39,32.88,1.25A1.39,1.39,0,0,1,33.22.6a1,1,0,0,1,.65-.16h1.54a.36.36,0,0,1,.41.41,1.36,1.36,0,0,1,0,.26,3.64,3.64,0,0,1-.12.41l-4,12.86a1.3,1.3,0,0,1-.35.65,1,1,0,0,1-.64.16H29.25a1,1,0,0,1-.67-.17,1.26,1.26,0,0,1-.32-.67L25.67,3.64,23.11,14.34a1.26,1.26,0,0,1-.32.67,1,1,0,0,1-.67.17Zm21.36.44a11.28,11.28,0,0,1-2.56-.29,7.44,7.44,0,0,1-1.92-.67,1,1,0,0,1-.61-.93v-.84q0-.52.38-.52a.9.9,0,0,1,.31.06l.42.17a8.77,8.77,0,0,0,1.83.58,9.78,9.78,0,0,0,2,.2,4.48,4.48,0,0,0,2.43-.55,1.76,1.76,0,0,0,.86-1.57,1.61,1.61,0,0,0-.45-1.16A4.29,4.29,0,0,0,43,9.22l-2.41-.76A5.15,5.15,0,0,1,38,6.78a3.94,3.94,0,0,1-.83-2.41,3.7,3.7,0,0,1,.45-1.85,4.47,4.47,0,0,1,1.19-1.37A5.27,5.27,0,0,1,40.51.29,7.4,7.4,0,0,1,42.6,0a8.87,8.87,0,0,1,1.12.07q.57.07,1.08.19t.95.26a4.27,4.27,0,0,1,.7.29,1.59,1.59,0,0,1,.49.41.94.94,0,0,1,.15.55v.79q0,.52-.38.52a1.76,1.76,0,0,1-.64-.2,7.74,7.74,0,0,0-3.2-.64,4.37,4.37,0,0,0-2.21.47,1.6,1.6,0,0,0-.79,1.48,1.58,1.58,0,0,0,.49,1.18,4.94,4.94,0,0,0,1.83.92L44.55,7a5.08,5.08,0,0,1,2.57,1.6A3.76,3.76,0,0,1,47.9,11a4.21,4.21,0,0,1-.44,1.93,4.4,4.4,0,0,1-1.21,1.47,5.43,5.43,0,0,1-1.85.93A8.25,8.25,0,0,1,42.05,15.62Z"/><path class="cls-2" d="M45.19,23.81C39.72,27.85,31.78,30,25,30A36.64,36.64,0,0,1,.22,20.57c-.51-.46-.06-1.09.56-.74A49.78,49.78,0,0,0,25.53,26.4,49.23,49.23,0,0,0,44.4,22.53C45.32,22.14,46.1,23.14,45.19,23.81Z"/><path class="cls-2" d="M47.47,21.21c-.7-.9-4.63-.42-6.39-.21-.53.06-.62-.4-.14-.74,3.13-2.2,8.27-1.57,8.86-.83s-.16,5.89-3.09,8.35c-.45.38-.88.18-.68-.32C46.69,25.8,48.17,22.11,47.47,21.21Z"/><path class="cls-1" d="M14.09,10.85a4.7,4.7,0,0,0,.19,1.48,7.73,7.73,0,0,0,.54,1.19.77.77,0,0,1,.12.38.64.64,0,0,1-.32.49l-1,.7a.83.83,0,0,1-.44.15.69.69,0,0,1-.49-.23,3.8,3.8,0,0,1-.6-.77q-.25-.42-.51-1a6.14,6.14,0,0,1-4.89,2.3,4.54,4.54,0,0,1-3.32-1.19,4.27,4.27,0,0,1-1.22-3.2A4.28,4.28,0,0,1,3.61,7.75,6.06,6.06,0,0,1,7.69,6.46a12.47,12.47,0,0,1,1.76.13q.92.13,1.91.36V5.73a3.65,3.65,0,0,0-.79-2.66A3.81,3.81,0,0,0,7.86,2.3a7.71,7.71,0,0,0-1.79.22,12.78,12.78,0,0,0-1.79.57,4.55,4.55,0,0,1-.58.22l-.26,0q-.35,0-.35-.52V2a1.09,1.09,0,0,1,.12-.58,1.2,1.2,0,0,1,.47-.35A10.88,10.88,0,0,1,5.77.32,10.19,10.19,0,0,1,8.36,0a6,6,0,0,1,4.35,1.35,5.49,5.49,0,0,1,1.38,4.09ZM7.34,13.38a5.36,5.36,0,0,0,1.72-.31A3.63,3.63,0,0,0,10.63,12,2.62,2.62,0,0,0,11.19,11a5.63,5.63,0,0,0,.16-1.44v-.7a14.35,14.35,0,0,0-1.53-.28,12.37,12.37,0,0,0-1.56-.1,3.84,3.84,0,0,0-2.47.67A2.34,2.34,0,0,0,5,11a2.35,2.35,0,0,0,.61,1.76A2.4,2.4,0,0,0,7.34,13.38Zm13.35,1.8a1,1,0,0,1-.64-.16,1.3,1.3,0,0,1-.35-.65L15.81,1.51a3,3,0,0,1-.15-.67.36.36,0,0,1,.41-.41H17.7a1,1,0,0,1,.65.16,1.4,1.4,0,0,1,.33.65l2.79,11,2.59-11A1.17,1.17,0,0,1,24.39.6a1.1,1.1,0,0,1,.67-.16H26.4a1.1,1.1,0,0,1,.67.16,1.17,1.17,0,0,1,.32.65L30,12.39,32.88,1.25A1.39,1.39,0,0,1,33.22.6a1,1,0,0,1,.65-.16h1.54a.36.36,0,0,1,.41.41,1.36,1.36,0,0,1,0,.26,3.64,3.64,0,0,1-.12.41l-4,12.86a1.3,1.3,0,0,1-.35.65,1,1,0,0,1-.64.16H29.25a1,1,0,0,1-.67-.17,1.26,1.26,0,0,1-.32-.67L25.67,3.64,23.11,14.34a1.26,1.26,0,0,1-.32.67,1,1,0,0,1-.67.17Zm21.36.44a11.28,11.28,0,0,1-2.56-.29,7.44,7.44,0,0,1-1.92-.67,1,1,0,0,1-.61-.93v-.84q0-.52.38-.52a.9.9,0,0,1,.31.06l.42.17a8.77,8.77,0,0,0,1.83.58,9.78,9.78,0,0,0,2,.2,4.48,4.48,0,0,0,2.43-.55,1.76,1.76,0,0,0,.86-1.57,1.61,1.61,0,0,0-.45-1.16A4.29,4.29,0,0,0,43,9.22l-2.41-.76A5.15,5.15,0,0,1,38,6.78a3.94,3.94,0,0,1-.83-2.41,3.7,3.7,0,0,1,.45-1.85,4.47,4.47,0,0,1,1.19-1.37A5.27,5.27,0,0,1,40.51.29,7.4,7.4,0,0,1,42.6,0a8.87,8.87,0,0,1,1.12.07q.57.07,1.08.19t.95.26a4.27,4.27,0,0,1,.7.29,1.59,1.59,0,0,1,.49.41.94.94,0,0,1,.15.55v.79q0,.52-.38.52a1.76,1.76,0,0,1-.64-.2,7.74,7.74,0,0,0-3.2-.64,4.37,4.37,0,0,0-2.21.47,1.6,1.6,0,0,0-.79,1.48,1.58,1.58,0,0,0,.49,1.18,4.94,4.94,0,0,0,1.83.92L44.55,7a5.08,5.08,0,0,1,2.57,1.6A3.76,3.76,0,0,1,47.9,11a4.21,4.21,0,0,1-.44,1.93,4.4,4.4,0,0,1-1.21,1.47,5.43,5.43,0,0,1-1.85.93A8.25,8.25,0,0,1,42.05,15.62Z"/><path class="cls-2" d="M45.19,23.81C39.72,27.85,31.78,30,25,30A36.64,36.64,0,0,1,.22,20.57c-.51-.46-.06-1.09.56-.74A49.78,49.78,0,0,0,25.53,26.4,49.23,49.23,0,0,0,44.4,22.53C45.32,22.14,46.1,23.14,45.19,23.81Z"/><path class="cls-2" d="M47.47,21.21c-.7-.9-4.63-.42-6.39-.21-.53.06-.62-.4-.14-.74,3.13-2.2,8.27-1.57,8.86-.83s-.16,5.89-3.09,8.35c-.45.38-.88.18-.68-.32C46.69,25.8,48.17,22.11,47.47,21.21Z"/></svg>


--------------------------------------------------------------------------------
/docs/images/AWS_logo_RGB_REV.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 30"><defs><style>.cls-1{fill:#fff;}.cls-2{fill:#f90;fill-rule:evenodd;}</style></defs><title>AWS-Logo_White-Color</title><path class="cls-1" d="M14.09,10.85a4.7,4.7,0,0,0,.19,1.48,7.73,7.73,0,0,0,.54,1.19.77.77,0,0,1,.12.38.64.64,0,0,1-.32.49l-1,.7a.83.83,0,0,1-.44.15.69.69,0,0,1-.49-.23,3.8,3.8,0,0,1-.6-.77q-.25-.42-.51-1a6.14,6.14,0,0,1-4.89,2.3,4.54,4.54,0,0,1-3.32-1.19,4.27,4.27,0,0,1-1.22-3.2A4.28,4.28,0,0,1,3.61,7.75,6.06,6.06,0,0,1,7.69,6.46a12.47,12.47,0,0,1,1.76.13q.92.13,1.91.36V5.73a3.65,3.65,0,0,0-.79-2.66A3.81,3.81,0,0,0,7.86,2.3a7.71,7.71,0,0,0-1.79.22,12.78,12.78,0,0,0-1.79.57,4.55,4.55,0,0,1-.58.22l-.26,0q-.35,0-.35-.52V2a1.09,1.09,0,0,1,.12-.58,1.2,1.2,0,0,1,.47-.35A10.88,10.88,0,0,1,5.77.32,10.19,10.19,0,0,1,8.36,0a6,6,0,0,1,4.35,1.35,5.49,5.49,0,0,1,1.38,4.09ZM7.34,13.38a5.36,5.36,0,0,0,1.72-.31A3.63,3.63,0,0,0,10.63,12,2.62,2.62,0,0,0,11.19,11a5.63,5.63,0,0,0,.16-1.44v-.7a14.35,14.35,0,0,0-1.53-.28,12.37,12.37,0,0,0-1.56-.1,3.84,3.84,0,0,0-2.47.67A2.34,2.34,0,0,0,5,11a2.35,2.35,0,0,0,.61,1.76A2.4,2.4,0,0,0,7.34,13.38Zm13.35,1.8a1,1,0,0,1-.64-.16,1.3,1.3,0,0,1-.35-.65L15.81,1.51a3,3,0,0,1-.15-.67.36.36,0,0,1,.41-.41H17.7a1,1,0,0,1,.65.16,1.4,1.4,0,0,1,.33.65l2.79,11,2.59-11A1.17,1.17,0,0,1,24.39.6a1.1,1.1,0,0,1,.67-.16H26.4a1.1,1.1,0,0,1,.67.16,1.17,1.17,0,0,1,.32.65L30,12.39,32.88,1.25A1.39,1.39,0,0,1,33.22.6a1,1,0,0,1,.65-.16h1.54a.36.36,0,0,1,.41.41,1.36,1.36,0,0,1,0,.26,3.64,3.64,0,0,1-.12.41l-4,12.86a1.3,1.3,0,0,1-.35.65,1,1,0,0,1-.64.16H29.25a1,1,0,0,1-.67-.17,1.26,1.26,0,0,1-.32-.67L25.67,3.64,23.11,14.34a1.26,1.26,0,0,1-.32.67,1,1,0,0,1-.67.17Zm21.36.44a11.28,11.28,0,0,1-2.56-.29,7.44,7.44,0,0,1-1.92-.67,1,1,0,0,1-.61-.93v-.84q0-.52.38-.52a.9.9,0,0,1,.31.06l.42.17a8.77,8.77,0,0,0,1.83.58,9.78,9.78,0,0,0,2,.2,4.48,4.48,0,0,0,2.43-.55,1.76,1.76,0,0,0,.86-1.57,1.61,1.61,0,0,0-.45-1.16A4.29,4.29,0,0,0,43,9.22l-2.41-.76A5.15,5.15,0,0,1,38,6.78a3.94,3.94,0,0,1-.83-2.41,3.7,3.7,0,0,1,.45-1.85,4.47,4.47,0,0,1,1.19-1.37A5.27,5.27,0,0,1,40.51.29,7.4,7.4,0,0,1,42.6,0a8.87,8.87,0,0,1,1.12.07q.57.07,1.08.19t.95.26a4.27,4.27,0,0,1,.7.29,1.59,1.59,0,0,1,.49.41.94.94,0,0,1,.15.55v.79q0,.52-.38.52a1.76,1.76,0,0,1-.64-.2,7.74,7.74,0,0,0-3.2-.64,4.37,4.37,0,0,0-2.21.47,1.6,1.6,0,0,0-.79,1.48,1.58,1.58,0,0,0,.49,1.18,4.94,4.94,0,0,0,1.83.92L44.55,7a5.08,5.08,0,0,1,2.57,1.6A3.76,3.76,0,0,1,47.9,11a4.21,4.21,0,0,1-.44,1.93,4.4,4.4,0,0,1-1.21,1.47,5.43,5.43,0,0,1-1.85.93A8.25,8.25,0,0,1,42.05,15.62Z"/><path class="cls-2" d="M45.19,23.81C39.72,27.85,31.78,30,25,30A36.64,36.64,0,0,1,.22,20.57c-.51-.46-.06-1.09.56-.74A49.78,49.78,0,0,0,25.53,26.4,49.23,49.23,0,0,0,44.4,22.53C45.32,22.14,46.1,23.14,45.19,23.81Z"/><path class="cls-2" d="M47.47,21.21c-.7-.9-4.63-.42-6.39-.21-.53.06-.62-.4-.14-.74,3.13-2.2,8.27-1.57,8.86-.83s-.16,5.89-3.09,8.35c-.45.38-.88.18-.68-.32C46.69,25.8,48.17,22.11,47.47,21.21Z"/></svg>


--------------------------------------------------------------------------------
/docs/images/AWS_logo_RGB_WHT.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 30"><defs><style>.cls-1,.cls-2{fill:#fff;}.cls-2{fill-rule:evenodd;}</style></defs><title>AWS-Logo_White</title><path class="cls-1" d="M14.09,10.85a4.7,4.7,0,0,0,.19,1.48,7.73,7.73,0,0,0,.54,1.19.77.77,0,0,1,.12.38.64.64,0,0,1-.32.49l-1,.7a.83.83,0,0,1-.44.15.69.69,0,0,1-.49-.23,3.8,3.8,0,0,1-.6-.77q-.25-.42-.51-1a6.14,6.14,0,0,1-4.89,2.3,4.54,4.54,0,0,1-3.32-1.19,4.27,4.27,0,0,1-1.22-3.2A4.28,4.28,0,0,1,3.61,7.75,6.06,6.06,0,0,1,7.69,6.46a12.47,12.47,0,0,1,1.76.13q.92.13,1.91.36V5.73a3.65,3.65,0,0,0-.79-2.66A3.81,3.81,0,0,0,7.86,2.3a7.71,7.71,0,0,0-1.79.22,12.78,12.78,0,0,0-1.79.57,4.55,4.55,0,0,1-.58.22l-.26,0q-.35,0-.35-.52V2a1.09,1.09,0,0,1,.12-.58,1.2,1.2,0,0,1,.47-.35A10.88,10.88,0,0,1,5.77.32,10.19,10.19,0,0,1,8.36,0a6,6,0,0,1,4.35,1.35,5.49,5.49,0,0,1,1.38,4.09ZM7.34,13.38a5.36,5.36,0,0,0,1.72-.31A3.63,3.63,0,0,0,10.63,12,2.62,2.62,0,0,0,11.19,11a5.63,5.63,0,0,0,.16-1.44v-.7a14.35,14.35,0,0,0-1.53-.28,12.37,12.37,0,0,0-1.56-.1,3.84,3.84,0,0,0-2.47.67A2.34,2.34,0,0,0,5,11a2.35,2.35,0,0,0,.61,1.76A2.4,2.4,0,0,0,7.34,13.38Zm13.35,1.8a1,1,0,0,1-.64-.16,1.3,1.3,0,0,1-.35-.65L15.81,1.51a3,3,0,0,1-.15-.67.36.36,0,0,1,.41-.41H17.7a1,1,0,0,1,.65.16,1.4,1.4,0,0,1,.33.65l2.79,11,2.59-11A1.17,1.17,0,0,1,24.39.6a1.1,1.1,0,0,1,.67-.16H26.4a1.1,1.1,0,0,1,.67.16,1.17,1.17,0,0,1,.32.65L30,12.39,32.88,1.25A1.39,1.39,0,0,1,33.22.6a1,1,0,0,1,.65-.16h1.54a.36.36,0,0,1,.41.41,1.36,1.36,0,0,1,0,.26,3.64,3.64,0,0,1-.12.41l-4,12.86a1.3,1.3,0,0,1-.35.65,1,1,0,0,1-.64.16H29.25a1,1,0,0,1-.67-.17,1.26,1.26,0,0,1-.32-.67L25.67,3.64,23.11,14.34a1.26,1.26,0,0,1-.32.67,1,1,0,0,1-.67.17Zm21.36.44a11.28,11.28,0,0,1-2.56-.29,7.44,7.44,0,0,1-1.92-.67,1,1,0,0,1-.61-.93v-.84q0-.52.38-.52a.9.9,0,0,1,.31.06l.42.17a8.77,8.77,0,0,0,1.83.58,9.78,9.78,0,0,0,2,.2,4.48,4.48,0,0,0,2.43-.55,1.76,1.76,0,0,0,.86-1.57,1.61,1.61,0,0,0-.45-1.16A4.29,4.29,0,0,0,43,9.22l-2.41-.76A5.15,5.15,0,0,1,38,6.78a3.94,3.94,0,0,1-.83-2.41,3.7,3.7,0,0,1,.45-1.85,4.47,4.47,0,0,1,1.19-1.37A5.27,5.27,0,0,1,40.51.29,7.4,7.4,0,0,1,42.6,0a8.87,8.87,0,0,1,1.12.07q.57.07,1.08.19t.95.26a4.27,4.27,0,0,1,.7.29,1.59,1.59,0,0,1,.49.41.94.94,0,0,1,.15.55v.79q0,.52-.38.52a1.76,1.76,0,0,1-.64-.2,7.74,7.74,0,0,0-3.2-.64,4.37,4.37,0,0,0-2.21.47,1.6,1.6,0,0,0-.79,1.48,1.58,1.58,0,0,0,.49,1.18,4.94,4.94,0,0,0,1.83.92L44.55,7a5.08,5.08,0,0,1,2.57,1.6A3.76,3.76,0,0,1,47.9,11a4.21,4.21,0,0,1-.44,1.93,4.4,4.4,0,0,1-1.21,1.47,5.43,5.43,0,0,1-1.85.93A8.25,8.25,0,0,1,42.05,15.62Z"/><path class="cls-2" d="M45.19,23.81C39.72,27.85,31.78,30,25,30A36.64,36.64,0,0,1,.22,20.57c-.51-.46-.06-1.09.56-.74A49.78,49.78,0,0,0,25.53,26.4,49.23,49.23,0,0,0,44.4,22.53C45.32,22.14,46.1,23.14,45.19,23.81Z"/><path class="cls-2" d="M47.47,21.21c-.7-.9-4.63-.42-6.39-.21-.53.06-.62-.4-.14-.74,3.13-2.2,8.27-1.57,8.86-.83s-.16,5.89-3.09,8.35c-.45.38-.88.18-.68-.32C46.69,25.8,48.17,22.11,47.47,21.21Z"/></svg>


--------------------------------------------------------------------------------
/docs/images/aws-genomics-workflows-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/aws-genomics-workflows-banner.png


--------------------------------------------------------------------------------
/docs/images/cloudformation-launch-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/cloudformation-launch-stack.png


--------------------------------------------------------------------------------
/docs/images/custom-deploy-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/custom-deploy-0.png


--------------------------------------------------------------------------------
/docs/images/custom-deploy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/custom-deploy-1.png


--------------------------------------------------------------------------------
/docs/images/genomics-workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/genomics-workflow.png


--------------------------------------------------------------------------------
/docs/images/nextflow-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/nextflow-0.png


--------------------------------------------------------------------------------
/docs/images/root-vpc-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-1.png


--------------------------------------------------------------------------------
/docs/images/root-vpc-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-2.png


--------------------------------------------------------------------------------
/docs/images/root-vpc-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-3.png


--------------------------------------------------------------------------------
/docs/images/root-vpc-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-4.png


--------------------------------------------------------------------------------
/docs/images/root-vpc-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/images/root-vpc-5.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Genomics Workflows on AWS
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | ![banner](images/aws-genomics-workflows-banner.png)
 6 | 
 7 | ## Introduction
 8 | 
 9 | Welcome!
10 | 
11 | This guide walks through how to use [Amazon Web Services](https://aws.amazon.com) ([AWS](https://aws.amazon.com)), such as [Amazon S3](https://aws.amazon.com/s3) and [AWS Batch](https://aws.amazon.com/batch), to run large scale genomics analyses.
12 | 
13 | Here you will learn how to:
14 | 
15 | 1. Use S3 buckets to stage large genomics datasets as inputs and outputs from analysis pipelines
16 | 2. Create job queues in AWS Batch to use for scalable parallel job execution
17 | 3. Orchestrate individual jobs into analysis workflows using native AWS services like [AWS Step Functions](https://aws.amazon.com/step-functions) and 3rd party workflow engines
18 | 
19 | If you're impatient and want to get something up and running immediately, head 
20 | straight to the [Quick Start](quick-start) section.  Otherwise, continue on for the full details.
21 | 
22 | ## Prerequisites
23 | 
24 | Throughout this guide we'll assume that you:
25 | 
26 | 1. Are familiar with the Linux command line
27 | 2. Can use SSH to access a Linux server
28 | 3. Have access to an AWS account
29 | 
30 | If you are completely new to AWS, we **highly recommend** going through the following [AWS 10-Minute Tutorials](https://aws.amazon.com/getting-started/tutorials/) that will demonstrate the basics of AWS, as well as set up your development machine for working with AWS.
31 | 
32 | 1. **[Launch a Linux Virtual Machine](https://aws.amazon.com/getting-started/tutorials/launch-a-virtual-machine/)** - A tutorial which walks users through the process of starting a host on AWS, and configuring your own computer to connect over SSH.
33 | 2. **[Batch upload files to the cloud](https://aws.amazon.com/getting-started/tutorials/backup-to-s3-cli/)** - A tutorial on using the AWS Command Line Interface (CLI) to access Amazon S3.
34 | 
35 | ### AWS Account Access
36 | 
37 | AWS has many services that can be used for genomics.  Here, we will build core architecture with [AWS Batch](https://aws.amazon.com/batch), a managed service that is built on top of other AWS services, such as [Amazon EC2](https://aws.amazon.com/ec2) and [Amazon Elastic Container Service (ECS)](https://aws.amazon.com/ecs).  Along the way, we'll leverage some advanced capabilities that need escalated (administrative) privileges to implement.  For example, you will need to be able to create [Roles](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) via AWS [Identity and Access Management (IAM)](https://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html), a service that helps you control who is authenticated (signed in) and authorized (has permissions) to use AWS resources.
38 | 
39 | !!! tip
40 |     We **strongly** recommend following the [IAM Security Best Practices](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) for securing your root AWS account and IAM users.
41 | 
42 | !!! note
43 |     If you are using an institutional account, it is likely you do not have administrative privileges, i.e. the IAM [AdministratorAccess](https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies_managed-vs-inline.html) managed policy is not attached to your IAM User or Role, and you won't be able to attach it yourself.
44 | 
45 |     If this is the case, you will need to work with your account administrator to get things set up for you. Refer them to this guide, and have them provide you with an [AWS Batch Job Queue ARN](https://docs.aws.amazon.com/batch/latest/userguide/job_queues.html), and an [Amazon S3 Bucket](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html) that you can write results to.
46 | 
47 | ## Contribution
48 | 
49 | This site is a living document, created for and by the genomics community at AWS and around the world.  We encourage you to contribute new content and make improvements to existing content via pull request to the [GitHub repo](https://github.com/aws-samples/aws-genomics-workflows/) that hosts the source code for this site.
50 | 


--------------------------------------------------------------------------------
/docs/install-cromwell/images/screen1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen1.png


--------------------------------------------------------------------------------
/docs/install-cromwell/images/screen2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen2.png


--------------------------------------------------------------------------------
/docs/install-cromwell/images/screen3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen3.png


--------------------------------------------------------------------------------
/docs/install-cromwell/images/screen4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen4.png


--------------------------------------------------------------------------------
/docs/install-cromwell/images/screen5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/install-cromwell/images/screen5.png


--------------------------------------------------------------------------------
/docs/orchestration/cost-effective-workflows/images/ClusterDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ClusterDashboard.png


--------------------------------------------------------------------------------
/docs/orchestration/cost-effective-workflows/images/ScreenShot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot1.png


--------------------------------------------------------------------------------
/docs/orchestration/cost-effective-workflows/images/ScreenShot1a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot1a.png


--------------------------------------------------------------------------------
/docs/orchestration/cost-effective-workflows/images/ScreenShot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot2.png


--------------------------------------------------------------------------------
/docs/orchestration/cost-effective-workflows/images/ScreenShot3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/ScreenShot3.png


--------------------------------------------------------------------------------
/docs/orchestration/cost-effective-workflows/images/TaskDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cost-effective-workflows/images/TaskDashboard.png


--------------------------------------------------------------------------------
/docs/orchestration/cromwell/cromwell-trouble-shooting.md:
--------------------------------------------------------------------------------
  1 | # Cromwell Troubleshooting
  2 | 
  3 | {{ deprecation_notice() }}
  4 | 
  5 | The following are some common errors that we have seen and suggested solutions
  6 | 
  7 | ## S3 Access Denied (403)
  8 | ### Possible Cause(s)
  9 | A 403 error from S3 indicates that Cromwell is trying to access an S3 object
 10 | that it doesn't have permission to.
 11 | Following the priciple of "least access" Cromwell uses an IAM EC2 instance role
 12 | that grants it read and write access to the S3 bucket you specified in the
 13 | CloudFormation deployment and read only access to the `gatk-test-data`
 14 | and `broad-references` S3 buckets.
 15 | If your workflow references other S3 objects (**even ones in your account**)
 16 | you will need to allow this via changes to the IAM role. Similarly if a step in
 17 | your workflow attempts to write to another bucket you will need to add
 18 | the appropriate permissions.
 19 | 
 20 | ### Suggested Solution(s)
 21 | 
 22 |  * Add read access to additional buckets by attaching a policy to the Cromwell
 23 |  server's IAM EC2 instance role with content similar to:
 24 | 
 25 | ```JSON
 26 | {
 27 |     "Version": "2012-10-17",
 28 |     "Statement": [
 29 |         {
 30 |             "Effect": "Allow",
 31 |             "Action": [
 32 |                 "s3:GetObject",
 33 |                 "s3:ListBucket"
 34 |             ],
 35 |             "Resource": [
 36 |                 "arn:aws:s3:::bucket-a",
 37 |                 "arn:aws:s3:::bucket-a/*",
 38 |                 "arn:aws:s3:::another-bucket",
 39 |                 "arn:aws:s3:::another-bucket/*"
 40 |             ]
 41 |         }
 42 |     ]
 43 | }
 44 | ```
 45 | The exact name of the role will be unique and generated by CloudFormation,
 46 | however it will contain the words "CromwellServer" and it will be the role attached
 47 | to the EC2 running the Cromwell server.
 48 | 
 49 | ## S3 File Not Found (404)
 50 | 
 51 | ### Possible Cause(s)
 52 | * A file required by the workflow cannot be found at the
 53 | specified S3 Path. Your workflow inputs might have the incorrect path OR an
 54 | expected file was not created by the previous step.
 55 | 
 56 | ### Suggested Solution(s)
 57 | * Check the paths of inputs and that the expected file exists at that path.
 58 | * If the file name is something like `<previousTaskName>-rc.txt` the previous task
 59 | failed before it was able to write out the result code. Inspect the `stderr.txt`
 60 | and `stdout.txt` of the previous step for possible reasons.
 61 | 
 62 | ## Cromwell Server OutOfMemory errors
 63 | 
 64 | ### Possible Cause(s)
 65 | 
 66 | * Out of memory errors on the Cromwell Server are typically the result of the JVM running
 67 | out of memory while attempting to keep track of multiple workflows or workflows with very
 68 | large scatter steps. 
 69 | 
 70 | ### Suggested Solutions
 71 | 
 72 | * Consider upgrading the server instance type to one with more RAM.
 73 | * Investigate tuning [Cromwell's `job-control` limits](https://github.com/broadinstitute/cromwell/blob/9249537fd094c6979b0c64e99fcc90d48c861487/core/src/main/resources/reference.conf#L543-L572) 
 74 | to find a configuration that appropriately restricts the number of queued Akka messages.
 75 | * Consider increasing the maximum instance RAM available to the JVM. Our Cloudformation templates 
 76 | this set to 85% (`-XX:MaxRAMPercentage=85.0`) allowing some head room for the OS. 
 77 | On larger instance types you may be able to increase this further.
 78 | * Ensure you are *not* using an in memory database on the server instance. Our cloudformation templates configure
 79 |  a separate Aurora MySQL cluster to avoid this. 
 80 |  
 81 | ## Cromwell Task (Container) OutOfMemory errors
 82 | 
 83 | ### Possible Cause(s)
 84 |  
 85 | * Individual tasks from a workflow run in docker containers on AWS Batch. If those containers
 86 |  have insufficient RAM for the task they can fail.
 87 | * Some older applications (including older versions of the JVM) do not always respect the memory
 88 |  limits imposed by the container and may think they have resources they cannot use.
 89 |  
 90 | ### Suggested solutions
 91 | 
 92 | * Assign more memory to the task in the `runtime: {}` stanza of the WDL or if the task application
 93 |  allows use command line or configuration parameters to appropriately limit memory.
 94 | * For tasks executed by the JVM investigate `-Xmx` and `-XX:MaxRAMPercentage` parameters.
 95 | 
 96 | ## Cromwell submitted AWS Batch jobs hang in 'Runnable' state
 97 | 
 98 | ### Possible Causes
 99 | * The resources requested by the task exceed the largest size of instance available in your AWS Batch Compute Environment
100 | * Batch worker EC2 instances are not able to join the Compute Environments ECS cluster
101 | 
102 | ### Suggested solutions
103 | * Reduce the resources required by your task to less than the maximum CPU and memory of the largest instance type allowed
104 | in your Batch Compute Environment.
105 | * In your EC2 console determine if any gwf-core workers have started. If they have then ensure they have a route to the 
106 | internet (for example, does your subnet have a NAT gateway). Worker nodes require access to the internet so that required 
107 | dependencies can be downloaded by the worker nodes at startup time. If this process fails then Docker will not start, the 
108 | ecs-agent will not run, and the systems manager will also not run. In addition, the node will also not be able to 
109 | communicate with the AWS Batch service.


--------------------------------------------------------------------------------
/docs/orchestration/cromwell/images/cromwell-on-aws_infrastructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/cromwell/images/cromwell-on-aws_infrastructure.png


--------------------------------------------------------------------------------
/docs/orchestration/nextflow/images/nextflow-on-aws-infrastructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/nextflow/images/nextflow-on-aws-infrastructure.png


--------------------------------------------------------------------------------
/docs/orchestration/nextflow/images/nextflow-on-aws-infrastructure.xml:
--------------------------------------------------------------------------------
1 | <mxfile userAgent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0" version="9.4.7" editor="drawio.corp.amazon.com" type="device"><diagram id="b35c2003-c7cf-cba2-4976-1ea9b5ca1687" name="Page-1">7Zxfd6MoFMA/TR6nR0RN8tim7U737J85m9nT2aceoiRhq5KD2KTz6RciGgXTpInaduv0oXKFC3J/XuBepwM4iTa/MLRa/k4DHA5sK9gM4PXAtoHljsQvKXnOJMMxyAQLRgJVaSeYkp84b6mkKQlwUqnIKQ05WVWFPo1j7POKDDFG19VqcxpWe12hBTYEUx+FpvSeBHyppMCydje+YrJYqq5HrroRobyyEiRLFNB1SQRvBnDCKOXZVbSZ4FBOXj4vWbvbPXeLgTEc86MaOFmLJxSm6uH+wBs+D8WgbOs7Sh7Fr1/pLFHD5c/5HNCUhyTGk2KKrQG8WjAUENH1hIaUCVlMY1H9asmjUJSAuFwvCcfTFfKlmrXAQ8jmNObKyMDOy6onqTXhjD7iXOfAhiMBzNiTNUkYan09YcaJMNVlSBaxEHMqu0Cq5IvBYab6qNMYoGSJAzXYRIyTxIvvUsU1FAI1WaILvNk746Cwo3gBMI0wZ8+iimrg5Cwo9uHIzcrrEkkjVWdZgghAJUSK3kWhe2dgcaFsvMfe9kv2/opRkNm7N3dL5gZw2KG5TWtf3k+F4Apxf3mcjbWZEz+3sq99tjeMN9/+04xXVlSYwCqZ8zc0w+E3mhBOqDRkRIIgrLP3jHJOozqTv4JCS6OwBIMiQfjplbwTbRZySbtA68S5WDEapD6/8+UQr1Ysu6jWmW1nuhGUIKyiNDZJqgXJagAkaICk1gYBC0cCGtYNTUUDjaaCsqNoKqA50nuUUDrkvFCyyp52TjZyHPXkMJzQlPlYoSOKdez4+dw+gGYIcjVn5ABz7fFqEPIaIMjcaAxsL+TSZORJXC7k5W4tym4JpaW7NQ1K+NW36KFsDUq7HSiBZ/q1tqB0DSinUC6Pqf+IubgAPUCNAzTbTu7DmvDlA539K/QmzYBkO1WQbKs77zY8AJLdg/RxQSr28B2ANDJAuosTjmIfH3kE7xF6BUKkmNtmDnvDt9tfjc2jXoR+ike2rZvJX/1Zr4OzHvZZMxx5Gkf20HRArR31ANjrgnoP1JoHascBdbmXBmaMwOAFBwuczyRlfEkXNEbhzU5aNgOOg0sZrJfGDKn/KEXhbFvObbrFBzGe11NIiZa3JMz1GIC5jnvlOdImNI2DgqRstHKIL8++eKKtYSt+V4xigVUtp95G5Yhf/qIyHCJOnqo91llBqftGScx39h5amr1dzZDZSFWrch5AU+SAqiLXGVYVZc9nKBITj55L1VayQrJ/wJ4e/dbzEwfqAxX02jGZjWBHaDHhx0FrngA/FrQCTfb8o1z4RxYuxNH2VKJtk+gsbdAJ0kBzYR7QSDwWafuQoj1In0KRZ26++jj7/yXODh1zF9/e7ss8AH56h5S/XhWP1N0i+wVo8QBvfOIq++WQouZcUv6BQYmje8oeVaI3j5bP2Aux9Ws8J3HmGPZW78PtLZ8PAuqnEY6bCm3BN4yR2ubBsjHXlrusA54t94BbYWdHhPxdLPsvtzP3lW+xi6DUeHSa93IOKWrQe9kfHhW8IfxH6TpbBYdnrII1+/LSN2NtY+TpngO6p2Gk78vd4blHzXNAqwtaGCth9q3DkSnpu3iVynFZcly29WfKs3K/hL7REprlhtpZP7tMDdnmKbMxp3js0WCfVzzxrNC4j3RqfCTszEcCLVrlnuwjDylqKBznaPFDZwRf3gK8XP/scFxNHv2DI37ijhGaHA+7w9hraMdoH1LU4I6xxbjJuz5c1JAy7owUPVTm6GmDY0nRExnOkZGRc/MPjortHZt/0PIV5zs8M23/d9J/TNvCLjBNcEPZeY15ALrLsUIzhLL9zxrWNJ1FJEm2sboenYbRYWn84NMoQnHQDEHgLRFqMbTyjtP0+ZtTORl0t05q2/lT0/Q6OIai5jZU8HN+zlH3PQfc8zb3oGQhkYNrUv/p6gdYlFztEyA734J3sSi1+BlOV/H+Iu994Q5KmW9wsiuCNcGsDj/EsZtyRYcUNeiK3kGE9C32NnWgeJ9mzXptDAAOGz3Ti+Lu7zVk1Xd/9QLe/Ac=</diagram></mxfile>


--------------------------------------------------------------------------------
/docs/orchestration/nextflow/nextflow-trouble-shooting.md:
--------------------------------------------------------------------------------
 1 | # Nextflow Troubleshooting
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | The following are some common errors that we have seen and suggested solutions
 6 | 
 7 | ## Job Logs say there is an error in AWS CLI while loading shared libraries
 8 | ### Possible Cause(s)
 9 | Nextflow on AWS Batch relies on the process containers being able to use the AWS CLI (which is mounted from the container host).
10 | Very minimal container images such as Alpine do not contain the `glibc` libraries needed by the AWS CLI.
11 | 
12 | ### Suggested Solution(s)
13 | 
14 |  * Modify your image to include or mount these dependencies
15 |  * Use an image (or build from a base) that already contains these such as `ubuntu:latest`
16 | 
17 | ## AWS credentials not working when set in the environment
18 | ### Possible Cause(s)
19 | You are using a local run of nextflow with temporary federated or IAM role credentials that use the AWS_SESSION_TOKEN in addition to AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY. Nextflow does not look for the AWS_SESSION_TOKEN environment variable as detailed at [nextflow/issues/2839](https://github.com/nextflow-io/nextflow/issues/2839)
20 | 
21 | ### Suggested Solution(s)
22 |   * Instead of using local credentials consider using an IAM role associated to an EC2 instance or ECS container where the Nextflow binary runs from. This will not require setting any local credentials and remove the need to update a session token.
23 |   * If you are using just using Nextflow locally for testing purposes, you can set credentials in a local `nextflow.config` which does support the AWS_SESSION_TOKEN
24 | ```
25 | aws {
26 |     accessKey = 'XXXXXXXXXXXXXXXX'
27 |     secretKey = 'XXXXXXXXXXXXXXXX'
28 |     sessionToken = 'XXXXXXXXXXXXXXX'
29 | } 
30 | ```
31 |   * ***N.B.*** If you set the `sessionToken` in the Nextflow config it will expire and will need to be updated. This expiry time will depend on the configuration of credentials generation within your account. 
32 | 
33 | ##  Container start errors
34 | ```
35 | CannotStartContainerError: Error response from daemon: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: exec: "/usr/local/env-execute": stat /usr/local/env-execute: no such file or directory: un
36 | ```
37 | ### Possible Cause(s)
38 | Nextflow on AWS Batch relies on the process containers being able to use a number of scripts that are mounted to the container. If references to these are wrong or do not exist then the tasks will not start.
39 | 
40 | ### Suggested Solution(s)
41 |  * If using the provided image setup with no changes, check the path specified for the aws-cli in your `nextflow.config` is set to
42 | ```
43 |  aws.batch.cliPath = '/opt/aws-cli/bin/aws'
44 | ```
45 | 
46 |   * Check the target S3 bucket created in the set-up has the following path: `bucket-name/<namespace>-ecs-additions/SourceStag/ ` and that content is present. 
47 |   * This location should contain a zip file that has the following in it:
48 | 
49 | ```
50 | .
51 | ├── awscli-shim.sh
52 | ├── ecs-additions-common.sh
53 | ├── ecs-additions-cromwell.sh
54 | ├── ecs-additions-nextflow.sh
55 | ├── ecs-additions-step-functions.sh
56 | ├── ecs-logs-collector.sh
57 | ├── fetch_and_run.sh
58 | ├── get-amazon-ebs-autoscale.sh
59 | └── provision.sh
60 | ```
61 |  * If this is missing check that `<namespace>-ecs-additions` exists and ran successfully in AWS Codepipeline and rerun if failures are present.
62 | 


--------------------------------------------------------------------------------
/docs/orchestration/orchestration-intro.md:
--------------------------------------------------------------------------------
 1 | # Workflow Orchestration
 2 | 
 3 | {{ deprecation_notice() }}
 4 | 
 5 | Having deployed a way to execute individual tasks via AWS Batch, we turn to
 6 | orchestration of complete workflows.
 7 | 
 8 | In order to process data, we will need to handle the cases for serial and parallel task execution, and retry logic when a task fails.
 9 | 
10 | The logic for workflows should live outside of the code for any individual task. There are a couple of options that researchers can use to define and execute repeatable data analysis pipelines on AWS Batch:
11 | 
12 | 1. [AWS Step Functions](./step-functions/step-functions-overview.md), a native AWS service for workflow orchestration.
13 | 
14 | 2. 3rd party alternatives:
15 | 
16 |     * [Cromwell](./cromwell/cromwell-overview.md), a workflow execution system
17 |     from the [Broad Institute](https://www.broadinstitute.org/)
18 | 
19 |     * [Nextflow](./nextflow/nextflow-overview.md), a reactive workflow framework and domain specific language (DSL) from the [Comparative Bioinformatics group](https://www.crg.eu/en/programmes-groups/notredame-lab) at the Barcelona [Centre for Genomic Regulation (CRG)](http://www.crg.eu/)
20 | 
21 | !!! help
22 |     There are many more 3rd party alternatives.  We are actively seeking out
23 |     help to document them here!
24 | 


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/files/example-state-machine.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "Comment":"A simple example that submits a Job to AWS Batch",
  3 |     "StartAt":"RunIsaacJob",
  4 |     "States":{
  5 |        "RunIsaacJob":{
  6 |           "Type":"Task",
  7 |           "Resource":"arn:aws:states:::batch:submitJob.sync",
  8 |           "Parameters":{
  9 |              "JobDefinition":"Isaac",
 10 |              "JobName.$":"$.isaac.JobName",
 11 |              "JobQueue":"HighPriority",
 12 |              "Parameters.$": "$.isaac"
 13 |           },
 14 |           "TimeoutSeconds": 1800,
 15 |           "HeartbeatSeconds": 60,
 16 |           "Next":"Parallel",
 17 |           "InputPath":"$",
 18 |           "ResultPath":"$.status",
 19 |           "Retry" : [
 20 |              {
 21 |                "ErrorEquals": [ "States.Timeout" ],
 22 |                "IntervalSeconds": 3,
 23 |                "MaxAttempts": 2,
 24 |                "BackoffRate": 1.5
 25 |              }
 26 |           ]
 27 |        },
 28 |        "Parallel":{
 29 |           "Type":"Parallel",
 30 |           "Next":"FinalState",
 31 |           "Branches":[
 32 |              {
 33 |                 "StartAt":"RunStrelkaJob",
 34 |                 "States":{
 35 |                    "RunStrelkaJob":{
 36 |                       "Type":"Task",
 37 |                       "Resource":"arn:aws:states:::batch:submitJob.sync",
 38 |                       "Parameters":{
 39 |                          "JobDefinition":"Strelka",
 40 |                          "JobName.$":"$.strelka.JobName",
 41 |                          "JobQueue":"HighPriority",
 42 |                          "Parameters.$": "$.strelka"
 43 |                       },
 44 |                       "TimeoutSeconds": 1800,
 45 |                       "HeartbeatSeconds": 60,
 46 |                       "Next":"RunSnpEffJob",
 47 |                       "InputPath":"$",
 48 |                       "ResultPath":"$.status",
 49 |                       "Retry" : [
 50 |                          {
 51 |                            "ErrorEquals": [ "States.Timeout" ],
 52 |                            "IntervalSeconds": 3,
 53 |                            "MaxAttempts": 2,
 54 |                            "BackoffRate": 1.5
 55 |                          }
 56 |                       ]
 57 |                    },
 58 |                    "RunSnpEffJob":{
 59 |                       "Type":"Task",
 60 |                       "Resource":"arn:aws:states:::batch:submitJob.sync",
 61 |                       "Parameters":{
 62 |                          "JobDefinition":"SNPEff",
 63 |                          "JobName.$":"$.snpeff.JobName",
 64 |                          "JobQueue":"HighPriority",
 65 |                          "Parameters.$": "$.snpeff"
 66 |                       },
 67 |                       "TimeoutSeconds": 1800,
 68 |                       "HeartbeatSeconds": 60,
 69 |                       "Retry" : [
 70 |                          {
 71 |                            "ErrorEquals": [ "States.Timeout" ],
 72 |                            "IntervalSeconds": 3,
 73 |                            "MaxAttempts": 2,
 74 |                            "BackoffRate": 1.5
 75 |                          }
 76 |                       ],
 77 |                       "End":true
 78 |                    }
 79 |                 }
 80 |              },
 81 |              {
 82 |                 "StartAt":"RunSamtoolsStatsJob",
 83 |                 "States":{
 84 |                    "RunSamtoolsStatsJob":{
 85 |                       "Type":"Task",
 86 |                       "Resource":"arn:aws:states:::batch:submitJob.sync",
 87 |                       "Parameters":{
 88 |                          "JobDefinition":"SamtoolsStats",
 89 |                          "JobName.$":"$.samtools.JobName",
 90 |                          "JobQueue":"HighPriority",
 91 |                          "Parameters.$": "$.samtools"
 92 |                       },
 93 |                       "TimeoutSeconds": 1800,
 94 |                       "HeartbeatSeconds": 60,
 95 |                       "End":true,
 96 |                       "Retry" : [
 97 |                          {
 98 |                            "ErrorEquals": [ "States.Timeout" ],
 99 |                            "IntervalSeconds": 3,
100 |                            "MaxAttempts": 2,
101 |                            "BackoffRate": 1.5
102 |                          }
103 |                       ]
104 |                    }
105 |                 }
106 |              }
107 |           ]
108 |        },
109 |        "FinalState":{
110 |           "Type":"Pass",
111 |           "End":true
112 |        }
113 |     }
114 |  }


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/aws-sfn-genomics-workflow-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/aws-sfn-genomics-workflow-arch.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/cfn-stack-outputs-statemachineinput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/cfn-stack-outputs-statemachineinput.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/cfn-stack-outputs-tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/cfn-stack-outputs-tab.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/example-state-machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/example-state-machine.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/sfn-batch-job-snippet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-batch-job-snippet.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/sfn-console-execution-inprogress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-execution-inprogress.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/sfn-console-start-execution-dialog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-start-execution-dialog.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/sfn-console-start-execution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-start-execution.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/sfn-console-statemachine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-console-statemachine.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/sfn-example-mapping-state-machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/sfn-example-mapping-state-machine.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/images/step-functions-structures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-genomics-workflows/d86b056afd4294b1df7449c04abf7e1fd7c2275d/docs/orchestration/step-functions/images/step-functions-structures.png


--------------------------------------------------------------------------------
/docs/orchestration/step-functions/step-functions-examples.md:
--------------------------------------------------------------------------------
1 | # Step Functions Workflow Examples
2 | 
3 | THIS IS A STUB
4 | 
5 | ![Example Workflow](./images/example-state-machine.png)
6 | 
7 | this was created from [this file](./files/example-state-machine.json).


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: mkdocs
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.6.6
 6 |   - pip
 7 |   - pip:
 8 |     - cfn-lint
 9 |     - fontawesome-markdown==0.2.6
10 |     - mkdocs==1.0.4
11 |     - mkdocs-macros-plugin==0.2.4
12 |     - mkdocs-markdownextradata-plugin==0.0.5
13 |     - mkdocs-material==3.1.0
14 |     - pymdown-extensions==6.0
15 | prefix: /Users/pwyming/anaconda3/envs/mkdocs
16 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | defines macros for documents using mkdocs-macros-plugin
  3 | """
  4 | 
  5 | from textwrap import dedent
  6 | from functools import wraps
  7 | 
  8 | def dedented(f):
  9 |     @wraps(f)
 10 |     def wrapper(*args, **kwargs):
 11 |         return dedent(f(*args, **kwargs).strip())
 12 |     return wrapper
 13 | 
 14 | def declare_variables(variables, macro):
 15 | 
 16 |     _artifacts = variables['artifacts']
 17 | 
 18 |     @macro
 19 |     @dedented
 20 |     def cfn_button(name, template, enabled=True):
 21 |         """
 22 |         create an cloudformation launch button
 23 |         """
 24 |         s3 = _artifacts['s3']
 25 | 
 26 |         if template.lower().startswith('http'):
 27 |             template_url = template
 28 |         else:
 29 |             s3['object'] = "/".join(
 30 |                 filter(None, [s3.get('prefix'), 'latest', 'templates', template])
 31 |             )
 32 | 
 33 |             template_url = "https://{bucket}.s3.amazonaws.com/{object}".format(**s3)
 34 | 
 35 |         cfn_url = "".join([
 36 |             "https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=",
 37 |             name,
 38 |             "&templateURL=",
 39 |             template_url,
 40 |         ])
 41 |         
 42 |         img_src = "/" + "/".join(
 43 |             filter(None, [s3.get('prefix'), 'images/cloudformation-launch-stack.png'])
 44 |         )
 45 | 
 46 |         html = '<a href="{url}" target="_blank" class="launch-button"><i class="material-icons">play_arrow</i></a>'
 47 |         if not enabled:
 48 |             html = '<a class="launch-button launch-button-disabled"><i class="material-icons">play_arrow</i></a>'
 49 | 
 50 |         return html.format(name=name, img=img_src, url=cfn_url)
 51 |     
 52 |     @macro
 53 |     @dedented
 54 |     def download_button(path, icon="cloud_download"):
 55 |         """
 56 |         create a download button
 57 |         """
 58 |         repo_url = variables['repo_url']
 59 |         s3 = _artifacts['s3']
 60 |             
 61 |         if path.lower().startswith('http'):
 62 |             src_url = path
 63 |         else:
 64 |             # s3['object'] = "/".join(
 65 |             #     filter(None, [s3.get('prefix'), path])
 66 |             # )
 67 | 
 68 |             # src_url = "https://s3.amazonaws.com/{bucket}/{object}".format(**s3)
 69 |             if repo_url.endswith("/"):
 70 |                 repo_url = repo_url[:-1]
 71 |             
 72 |             if path.startswith("/"):
 73 |                 path = path[1:]
 74 |             
 75 |             src_url = f"{repo_url}/blob/master/src/{path}"
 76 |         
 77 |         return """
 78 |         <a href="{url}" target="_blank"><i class="material-icons">{icon}</i></a>
 79 |         """.format(icon=icon, url=src_url)
 80 |     
 81 |     @macro
 82 |     @dedented
 83 |     def cfn_stack_row(name, stack_name, template, description, enable_cfn_button=True):
 84 |         if template.lower().startswith('http'):
 85 |             stack_url = template
 86 |         else:
 87 |             stack_url = "templates/" + template
 88 | 
 89 |         return """
 90 |         | {name} | {description} | {download_button} | {cfn_button} |
 91 |         """.format(
 92 |             name=name,
 93 |             stack_name=stack_name,
 94 |             download_button=download_button(stack_url),
 95 |             cfn_button=cfn_button(stack_name, template, enabled=enable_cfn_button),
 96 |             description=description
 97 |         )
 98 | 
 99 |     @macro
100 |     @dedented
101 |     def deprecation_notice():
102 |         return """
103 | !!! error "DEPRECATION NOTICE"
104 |     This site and related code are no longer actively maintained.
105 |     
106 |     This site will be disabled and the underlying Github repository will be **archived on 2023-07-31**. This allows all code and assets presented here to remain publicly available for historical reference purposes only.
107 | 
108 |     For more up to date solutions to running Genomics workflows on AWS checkout:
109 | 
110 |     - [Amazon Omics](https://aws.amazon.com/omics/) - a fully managed service for storing, processing, and querying genomic, transcriptomic, and other omics data into insights. [Omics Workflows](https://docs.aws.amazon.com/omics/latest/dev/workflows.html) provides fully managed execution of pre-packaged [Ready2Run](https://docs.aws.amazon.com/omics/latest/dev/service-workflows.html) workflows or private workflows you create using WDL or Nextflow.
111 |     - [Amazon Genomics CLI](https://aws.amazon.com/genomics-cli/) - an open source tool that automates deploying and running workflow engines in AWS. AGC uses the same architectural patterns described here (i.e. operating workflow engines with AWS Batch). It provides support for running WDL, Nextflow, Snakemake, and CWL based workflows.
112 |         """


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Genomics Workflows on AWS
 2 | nav:
 3 |   - Overview: index.md
 4 |   - Disclaimer: disclaimer.md
 5 |   - Quick Start: quick-start.md
 6 |   - Core Environment:
 7 |     - Introduction: core-env/introduction.md
 8 |     - Data Storage: core-env/create-s3-bucket.md
 9 |     - Permissions: core-env/create-iam-roles.md
10 |     - Compute Resources: core-env/create-custom-compute-resources.md
11 |     - AWS Batch: core-env/setup-aws-batch.md
12 |     - Customized Deployment: core-env/custom-deploy.md
13 |     - Building a Custom Distribution: core-env/build-custom-distribution.md
14 |   # - Containerized Tooling:
15 |   #   - Introduction: containers/container-introduction.md
16 |   #   - Examples: containers/container-examples.md
17 |   - Workflow Orchestration:
18 |     - Introduction: orchestration/orchestration-intro.md
19 |     - AWS Step Functions:
20 |       - Overview: orchestration/step-functions/step-functions-overview.md
21 |       # - Examples: orchestration/step-functions/step-functions-examples.md
22 |     - Cromwell:
23 |       - Overview: orchestration/cromwell/cromwell-overview.md
24 |       - Examples: orchestration/cromwell/cromwell-examples.md
25 |       - Trouble Shooting: orchestration/cromwell/cromwell-trouble-shooting.md
26 |     - Nextflow: 
27 |       - Overview: orchestration/nextflow/nextflow-overview.md
28 |       # - Examples: orchestration/nextflow/nextflow-examples.md
29 |       - Trouble Shooting: orchestration/nextflow/nextflow-trouble-shooting.md
30 |     - Cost Effective Workflows: orchestration/cost-effective-workflows/cost-effective-workflows.md
31 | 
32 | extra_css: [extra.css]
33 | theme: 
34 |   name: material
35 |   logo: 'images/AWS_logo_RGB_REV.svg'
36 |   palette:
37 |     primary: blue grey
38 |     accent: deep orange
39 | markdown_extensions:
40 |   - admonition
41 |   - pymdownx.emoji:
42 |       emoji_generator: !!python/name:pymdownx.emoji.to_png
43 | 
44 | repo_url: &repo_url https://github.com/aws-samples/aws-genomics-workflows/
45 | repo_name: Contribute
46 | copyright: 2019 Amazon Web Services
47 | 
48 | plugins:
49 | - search
50 | - macros
51 | 
52 | extra:
53 |   repo_url: *repo_url
54 |   artifacts:
55 |     s3: 
56 |       bucket: aws-genomics-workflows
57 |   site:
58 |     s3:
59 |       bucket: docs.opendata.aws
60 |       prefix: genomics-workflows
61 | 
62 | use_directory_urls: false


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs==1.3.0
2 | mkdocs-macros-plugin==0.2.4
3 | mkdocs-markdownextradata-plugin==0.0.5
4 | mkdocs-material==3.1.0
5 | pymdown-extensions==10.0
6 | jinja2==3.0.0  # https://github.com/mkdocs/mkdocs/issues/2799
7 | cfn-lint
8 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.pem
2 | _ignore
3 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/.gitignore:
--------------------------------------------------------------------------------
1 | *.js
2 | !jest.config.js
3 | *.d.ts
4 | node_modules
5 | 
6 | # CDK asset staging directory
7 | .cdk.staging
8 | cdk.out
9 | cdk.context.json


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/.npmignore:
--------------------------------------------------------------------------------
1 | *.ts
2 | !*.d.ts
3 | 
4 | # CDK asset staging directory
5 | .cdk.staging
6 | cdk.out
7 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/app.config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "accountID": "111111111111",
 3 |     "region": "us-west-2",
 4 |     "projectName": "genomics",
 5 |     "tags": [{
 6 |             "name": "Environment",
 7 |             "value": "production"
 8 |         },
 9 |         {
10 |             "name": "Project",
11 |             "value": "genomics-pipeline"
12 |         }
13 |     ],
14 |     "S3": {
15 |         "existingBucket": true,
16 |         "bucketName": "YOUR-BUCKET-NAME"
17 |     },
18 |     "VPC": {
19 |         "createVPC": true,
20 |         "VPCName": "genomics-vpc",
21 |         "maxAZs": 2,
22 |         "cidr": "10.0.0.0/16",
23 |         "cidrMask": 24
24 |     },
25 |     "batch": {
26 |         "defaultVolumeSize": 100,
27 |         "spotMaxVCPUs": 128,
28 |         "onDemendMaxVCPUs": 128,
29 |         "instanceTypes": [
30 |             "c4.large",
31 |             "c4.xlarge",
32 |             "c4.2xlarge",
33 |             "c4.4xlarge",
34 |             "c4.8xlarge",
35 |             "c5.large",
36 |             "c5.xlarge",
37 |             "c5.2xlarge",
38 |             "c5.4xlarge",
39 |             "c5.9xlarge",
40 |             "c5.12xlarge",
41 |             "c5.18xlarge",
42 |             "c5.24xlarge"
43 |         ]
44 |     },
45 |     "workflows": [{
46 |         "name": "variantCalling",
47 |         "spot": true
48 |     }]
49 | }
50 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/assets/genomics-policy-s3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Deny",
 6 |             "Action": [
 7 |                 "s3:Delete*",
 8 |                 "s3:PutBucket*"
 9 |             ],
10 |             "Resource": [
11 |                 "arn:aws:s3:::BUCKET_NAME"
12 |             ]
13 |         },
14 |         {
15 |             "Effect": "Allow",
16 |             "Action": [
17 |                 "s3:ListBucket*"
18 |             ],
19 |             "Resource": [
20 |                 "arn:aws:s3:::BUCKET_NAME"
21 |             ]
22 |         },
23 |         {
24 |             "Effect": "Allow",
25 |             "Action": [
26 |                 "s3:*"
27 |             ],
28 |             "Resource": [
29 |                 "arn:aws:s3:::BUCKET_NAME/*"
30 |             ]
31 |         }
32 |     ]
33 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/assets/launch_template_user_data.txt:
--------------------------------------------------------------------------------
 1 | MIME-Version: 1.0
 2 | Content-Type: multipart/mixed; boundary="==BOUNDARY=="
 3 | 
 4 | --==BOUNDARY==
 5 | Content-Type: text/cloud-config; charset="us-ascii"
 6 | 
 7 | #cloud-config
 8 | repo_update: true
 9 | repo_upgrade: security
10 | 
11 | packages:
12 | - jq
13 | - btrfs-progs
14 | - sed
15 | - git
16 | - amazon-ssm-agent
17 | - unzip
18 | - amazon-cloudwatch-agent
19 | 
20 | write_files:
21 | - permissions: '0644'
22 |   path: /opt/aws/amazon-cloudwatch-agent/etc/config.json
23 |   content: |
24 |     {
25 |       "agent": {
26 |         "logfile": "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log"
27 |       },
28 |       "logs": {
29 |         "logs_collected": {
30 |           "files": {
31 |             "collect_list": [
32 |               {
33 |                 "file_path": "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log",
34 |                 "log_group_name": "/aws/ecs/container-instance/${Namespace}",
35 |                 "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/amazon-cloudwatch-agent.log"
36 |               },
37 |               {
38 |                 "file_path": "/var/log/cloud-init.log",
39 |                 "log_group_name": "/aws/ecs/container-instance/${Namespace}",
40 |                 "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/cloud-init.log"
41 |               },
42 |               {
43 |                 "file_path": "/var/log/cloud-init-output.log",
44 |                 "log_group_name": "/aws/ecs/container-instance/${Namespace}",
45 |                 "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/cloud-init-output.log"
46 |               },
47 |               {
48 |                 "file_path": "/var/log/ecs/ecs-init.log",
49 |                 "log_group_name": "/aws/ecs/container-instance/${Namespace}",
50 |                 "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-init.log"
51 |               },
52 |               {
53 |                 "file_path": "/var/log/ecs/ecs-agent.log",
54 |                 "log_group_name": "/aws/ecs/container-instance/${Namespace}",
55 |                 "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-agent.log"
56 |               },
57 |               {
58 |                 "file_path": "/var/log/ecs/ecs-volume-plugin.log",
59 |                 "log_group_name": "/aws/ecs/container-instance/${Namespace}",
60 |                 "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-volume-plugin.log"
61 |               }
62 |             ]
63 |           }
64 |         }
65 |       }
66 |     }
67 | 
68 | runcmd:
69 | 
70 | # start the amazon-cloudwatch-agent
71 | - /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/opt/aws/amazon-cloudwatch-agent/etc/config.json
72 | 
73 | # install aws-cli v2 and copy the static binary in an easy to find location for bind-mounts into containers
74 | - curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip"
75 | - unzip -q /tmp/awscliv2.zip -d /tmp
76 | - /tmp/aws/install -b /usr/bin
77 | 
78 | # check that the aws-cli was actually installed. if not shutdown (terminate) the instance
79 | - command -v aws || shutdown -P now
80 | 
81 | - mkdir -p /opt/aws-cli/bin
82 | - cp -a $(dirname $(find /usr/local/aws-cli -name 'aws' -type f))/. /opt/aws-cli/bin/
83 | 
84 | # set environment variables for provisioning
85 | - export GWFCORE_NAMESPACE=${Namespace}
86 | - export INSTALLED_ARTIFACTS_S3_ROOT_URL=$(aws ssm get-parameter --name /gwfcore/${Namespace}/installed-artifacts/s3-root-url --query 'Parameter.Value' --output text)
87 | 
88 | # enable ecs spot instance draining
89 | - echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config
90 | 
91 | # pull docker images only if missing
92 | - echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config
93 | 
94 | - cd /opt
95 | - aws s3 sync $INSTALLED_ARTIFACTS_S3_ROOT_URL/ecs-additions ./ecs-additions
96 | - chmod a+x /opt/ecs-additions/provision.sh
97 | - /opt/ecs-additions/provision.sh
98 | 
99 | --==BOUNDARY==--


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/bin/aws-genomics-cdk.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import "source-map-support/register";
 3 | import * as cdk from "@aws-cdk/core";
 4 | import { AwsGenomicsCdkStack } from "../lib/aws-genomics-cdk-stack";
 5 | import * as config from "../app.config.json";
 6 | 
 7 | const env = {
 8 |   account: process.env.CDK_DEFAULT_ACCOUNT ?? config.accountID,
 9 |   region: process.env.CDK_DEFAULT_REGION ?? config.region,
10 | };
11 | 
12 | const app = new cdk.App();
13 | const genomicsStack = new AwsGenomicsCdkStack(
14 |   app,
15 |   `${config.projectName}CdkStack`,
16 |   {
17 |     env: env,
18 |   }
19 | );
20 | 
21 | for (let i = 0; i < config.tags.length; i++) {
22 |   cdk.Tags.of(genomicsStack).add(config.tags[i].name, config.tags[i].value);
23 | }
24 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "npx ts-node --prefer-ts-exts bin/aws-genomics-cdk.ts",
 3 |   "context": {
 4 |     "@aws-cdk/core:enableStackNameDuplicates": "true",
 5 |     "aws-cdk:enableDiffNoFail": "true",
 6 |     "@aws-cdk/core:stackRelativeExports": "true",
 7 |     "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true,
 8 |     "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true,
 9 |     "@aws-cdk/aws-kms:defaultKeyPolicies": true,
10 |     "@aws-cdk/aws-s3:grantWriteWithoutAcl": true,
11 |     "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | DEFAULT_PROJECT_NAME="genomics"
 5 | IMAGE_NAME=$1
 6 | PROJECT_NAME="${2:-$DEFAULT_PROJECT_NAME}"
 7 | DOCKER_FILE_PATH="./${IMAGE_NAME}/Dockerfile"
 8 | REGISTRY="$CDK_DEFAULT_ACCOUNT.dkr.ecr.$CDK_DEFAULT_REGION.amazonaws.com"
 9 | REPOSITORY_NAME="${PROJECT_NAME}/${IMAGE_NAME}"
10 | IMAGE_TAG=":latest"
11 | IMAGE_WITH_TAG="${IMAGE_NAME}${IMAGE_TAG}"
12 | REGISTRY_PATH="${REGISTRY}/${REPOSITORY_NAME}"
13 | REGISTRY_PATH_WITH_TAG="${REGISTRY}/${PROJECT_NAME}/${IMAGE_WITH_TAG}"
14 | 
15 | 
16 | if [ -z "${IMAGE_NAME}" ]
17 | then
18 |     echo "Missing image name parameter."
19 |     exit 1
20 | fi
21 | 
22 | if [[ ! -f "${DOCKER_FILE_PATH}" ]]
23 | then
24 |     echo "${DOCKER_FILE_PATH} does not exist on the filesystem."
25 |     exit 1
26 | fi
27 | 
28 | if [ -z "$CDK_DEFAULT_ACCOUNT" ]
29 | then
30 |     echo "Missing CDK_DEFAULT_ACCOUNT environment variable."
31 |     exit 1
32 | fi
33 | 
34 | if [ -z "$CDK_DEFAULT_REGION" ]
35 | then
36 |     echo "Missing CDK_DEFAULT_REGION environment variable."
37 |     exit 1
38 | fi
39 | 
40 | 
41 | echo "Docker Login to ECR"
42 | eval $(aws ecr get-login --no-include-email --region ${CDK_DEFAULT_REGION})
43 | 
44 | 
45 | # Check if the repository exists in ECR and if not, create it
46 | REPO=`aws ecr describe-repositories | grep -o ${REGISTRY_PATH}` || true
47 | if [  "${REPO}" != "${REGISTRY_PATH}" ]
48 | then
49 |     aws ecr create-repository --repository-name ${REPOSITORY_NAME}
50 | fi
51 | 
52 | # build the base image
53 | docker build \
54 |     -t ${IMAGE_NAME} \
55 |     -f ${DOCKER_FILE_PATH} .
56 | 
57 | # build the image with an AWS specific entrypoint
58 | docker build \
59 |     --build-arg BASE_IMAGE=${IMAGE_NAME} \
60 |     -t ${IMAGE_WITH_TAG} \
61 |     -f ./entry.dockerfile .
62 |     
63 | 
64 | # tag the image
65 | docker tag ${IMAGE_WITH_TAG} ${REGISTRY_PATH}
66 | 
67 | 
68 | # push the image to the registry
69 | docker push ${REGISTRY_PATH_WITH_TAG}


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/bwa/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build
 2 | 
 3 | ARG BWA_VERSION=0.7.17
 4 | 
 5 | RUN apt-get update -y \
 6 |  && apt-get install -y \
 7 |     wget \
 8 |     make \
 9 |     gcc \
10 |     zlib1g-dev \
11 |     bzip2
12 | 
13 | 
14 | WORKDIR /opt/src
15 | RUN wget https://github.com/lh3/bwa/releases/download/v${BWA_VERSION}/bwa-${BWA_VERSION}.tar.bz2 \
16 |  && tar -xjvf bwa-*.tar.bz2 \
17 |  && cd bwa-* \
18 |  && make \
19 |  && cp bwa /opt/src
20 | 
21 | 
22 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final
23 | 
24 | RUN apt-get update -y \
25 |  && apt-get install -y \
26 |     wget \
27 |     make \
28 |     zlib1g \
29 |     bzip2 \
30 |  && apt-get clean
31 | 
32 | WORKDIR /opt/bin
33 | COPY --from=build /opt/src/bwa .
34 | 
35 | ENV PATH=/opt/bin:$PATH
36 | 
37 | WORKDIR /scratch
38 | 
39 | ENTRYPOINT ["bwa"]


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/entry.dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE
 2 | FROM ${BASE_IMAGE}:latest
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install -y gettext-base wget
 6 | RUN apt-get clean
 7 | 
 8 | ENV PATH=/opt/bin:$PATH
 9 | 
10 | COPY entrypoint.sh /opt/bin/entrypoint.sh
11 | RUN chmod +x /opt/bin/entrypoint.sh
12 | 
13 | WORKDIR /scratch
14 | 
15 | ENTRYPOINT ["entrypoint.sh"]


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/fastqc/Dockerfile:
--------------------------------------------------------------------------------
 1 | # base image
 2 | FROM ubuntu:xenial
 3 | 
 4 | # metadata
 5 | LABEL base.image="ubuntu:xenial"
 6 | LABEL version="1"
 7 | LABEL software="FASTQC"
 8 | LABEL software.version="0.11.8"
 9 | LABEL description="A quality control analysis tool for high throughput sequencing data"
10 | LABEL website="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
11 | LABEL license="https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt"
12 | LABEL maintainer="Abigail Shockey"
13 | LABEL maintainer.email="abigail.shockey@slh.wisc.edu"
14 | 
15 | RUN apt-get update && apt-get install -y \
16 |   unzip \
17 |   wget \
18 |   perl \
19 |   default-jre \
20 |   && apt-get clean && apt-get autoclean && rm -rf /var/lib/apt/lists/*
21 | 
22 | RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip && \
23 |     unzip fastqc_v0.11.8.zip && \
24 |     rm fastqc_v0.11.8.zip && \
25 |     chmod +x FastQC/fastqc
26 | 
27 | 
28 | ENV PATH="${PATH}:/FastQC/"
29 | 
30 | RUN mkdir /data
31 | WORKDIR /data
32 | 
33 | ENTRYPOINT ["fastqc"]


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/gatk/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM broadinstitute/gatk:4.1.3.0
2 | 
3 | ENTRYPOINT ["gatk"]


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/minimap2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04 AS build
 2 | 
 3 | ARG VERSION=2.17
 4 | 
 5 | # metadata
 6 | LABEL base.image="ubuntu:18.04"
 7 | LABEL container.version="1"
 8 | LABEL software="Minimap2"
 9 | LABEL software.version="${VERSION}"
10 | LABEL description="versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database"
11 | LABEL website="https://github.com/lh3/minimap2"
12 | LABEL license="https://github.com/lh3/minimap2/blob/master/LICENSE.txt"
13 | LABEL maintainer="Kelsey Florek"
14 | LABEL maintainer.email="Kelsey.florek@slh.wisc.edu"
15 | 
16 | # install dependeny tools
17 | RUN apt-get update && apt-get install -y python curl bzip2 && apt-get clean
18 | 
19 | # download and extract minimap2
20 | WORKDIR /opt/bin
21 | RUN curl -L https://github.com/lh3/minimap2/releases/download/v2.17/minimap2-2.17_x64-linux.tar.bz2 | tar -jxvf -
22 | 
23 | # add minimap2 to the path
24 | ENV PATH="${PATH}:/opt/bin/minimap2-2.17_x64-linux"
25 | 
26 | WORKDIR /scratch
27 | 
28 | ENTRYPOINT ["minimap2"]


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/picard/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM broadinstitute/picard
2 | 
3 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/containers/samtools/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build
 2 | 
 3 | ARG VERSION=1.9
 4 | 
 5 | # Metadata
 6 | LABEL container.base.image="ubuntu:18.04"
 7 | LABEL software.name="SAMtools"
 8 | LABEL software.version=${VERSION}
 9 | LABEL software.description="Utilities for the Sequence Alignment/Map (SAM/BAM/CRAM) formats"
10 | LABEL software.website="http://www.htslib.org"
11 | LABEL software.documentation="http://www.htslib.org/doc/samtools.html"
12 | LABEL software.license="MIT/Expat"
13 | LABEL tags="Genomics"
14 | 
15 | # System and library dependencies
16 | RUN apt-get -y update && \
17 |     apt-get -y install \
18 |       autoconf \
19 |       automake \
20 |       make \
21 |       gcc \
22 |       perl \
23 |       zlib1g-dev \
24 |       libbz2-dev \
25 |       liblzma-dev \
26 |       libcurl4-gnutls-dev \
27 |       libssl-dev \
28 |       libncurses5-dev \
29 |       wget && \
30 |     apt-get clean
31 | 
32 | # Application installation
33 | RUN wget -O /samtools-${VERSION}.tar.bz2 \
34 |   https://github.com/samtools/samtools/releases/download/${VERSION}/samtools-${VERSION}.tar.bz2 && \
35 |   tar xvjf /samtools-${VERSION}.tar.bz2 && rm /samtools-${VERSION}.tar.bz2
36 | 
37 | WORKDIR /samtools-${VERSION}
38 | RUN ./configure && make
39 | 
40 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final
41 | COPY --from=build /samtools-*/samtools /usr/local/bin
42 | 
43 | RUN apt-get -y update && \
44 |     apt-get -y install \
45 |       libcurl3-gnutls && \
46 |     apt-get clean
47 | 
48 | ENTRYPOINT ["samtools"]


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/README.md:
--------------------------------------------------------------------------------
  1 | # Bioinformatics tools examples
  2 | 
  3 | After [deploying the CDK genomics pipeline project](GITHUB URL) you could test 
  4 | the genomics tools directly with AWS Batch or start a Step Functions pipeline.
  5 | 
  6 | 
  7 | ### Testing bioinformatics tools using AWS Batch
  8 | Create a file named batch-TOOL_NANE.json.
  9 | ```
 10 | {
 11 |     "jobName": "",
 12 |     "jobQueue": "",
 13 |     "jobDefinition": "",
 14 |     "containerOverrides": {
 15 |         "vcpus": 1,
 16 |         "memory": 1000,
 17 |         "command": [""],
 18 |         "environment": [{
 19 |                 "name": "JOB_INPUTS",
 20 |                 "value": ""
 21 |             },
 22 |             {
 23 |                 "name": "JOB_OUTPUTS",
 24 |                 "value": ""
 25 |             },
 26 |             {
 27 |                 "name": "JOB_OUTPUT_PREFIX",
 28 |                 "value": ""
 29 |             }
 30 |         ]
 31 |     }
 32 | }
 33 | 
 34 | ```
 35 | 
 36 | **jobName** (string)  
 37 | The name of the job. The first character must be alphanumeric, and up to 128 
 38 | letters (uppercase and lowercase), numbers, hyphens, and underscores are 
 39 | allowed.
 40 | 
 41 | **jobQueue** (string)  
 42 | The [job queue](https://docs.aws.amazon.com/batch/latest/userguide/job_queues.html) 
 43 | into which the job is submitted. You can specify either the name or the Amazon 
 44 | Resource Name (ARN) of the queue.
 45 | 
 46 | **jobDefinition** (string)  
 47 | The [job definition](https://docs.aws.amazon.com/batch/latest/userguide/job_definitions.html) 
 48 | used by this job. This value can be one of name , name:revision , or the Amazon 
 49 | Resource Name (ARN) for the job definition. If name is specified without 
 50 | a revision then the latest active revision is used.
 51 | 
 52 | **containerOverrides.vcpus** (integer optional)  
 53 | The number of vCPUs to reserve for the container. This value overrides the 
 54 | value set in the job definition.
 55 | 
 56 | **containerOverrides.memory** (integer optional)  
 57 | The number of MiB of memory reserved for the job. This value overrides the 
 58 | value set in the job definition.
 59 | 
 60 | **containerOverrides.command** (list)  
 61 | The command to send to the container that overrides the default command from 
 62 | the Docker image or the job definition.
 63 | 
 64 | **containerOverrides.environment** (list)  
 65 | The environment variables to send to the container. You can add new environment 
 66 | variables, which are added to the container at launch, or you can override the 
 67 | existing environment variables from the Docker image or the job definition.  
 68 | (structure)  
 69 | A key-value pair object.  
 70 | **name** (string)  
 71 | The name of the key-value pair. For environment variables, this is the name of 
 72 | the environment variable.  
 73 | **value** (string)  
 74 | The value of the key-value pair. For environment variables, this is the value 
 75 | of the environment variable.
 76 | 
 77 | Example for a `batch-fastqc.json`
 78 | ```
 79 | {
 80 |     "jobName": "fastqc",
 81 |     "jobQueue": "genomics-default-queue",
 82 |     "jobDefinition": "genomics-fastqc:1",
 83 |     "containerOverrides": {
 84 |         "vcpus": 1,
 85 |         "memory": 1000,
 86 |         "command": ["fastqc *.gz"],
 87 |         "environment": [{
 88 |                 "name": "JOB_INPUTS",
 89 |                 "value": "s3://aws-batch-genomics-shared/secondary-analysis/example-files/fastq/NIST7035_R*.fastq.gz"
 90 |             },
 91 |             {
 92 |                 "name": "JOB_OUTPUTS",
 93 |                 "value": "*.html *.zip"
 94 |             },
 95 |             {
 96 |                 "name": "JOB_OUTPUT_PREFIX",
 97 |                 "value": "s3://my-genomics-bucket-name/some-folder-name"
 98 |             }
 99 |         ]
100 |     }
101 | }
102 | 
103 | ```
104 | In this example we are running the FastQC tools that will take fastq files and 
105 | generate a report. It will output zip and html files which we will save to an 
106 | S3 bucket.  
107 | **jobName** - "fastqc". A name that describe the job to be run.  
108 | **jobQueue** - "genomics-default-queue". A valid name of a job queue. This 
109 | could be found in the AWS web console > Batch > Job queues.  
110 | **jobDefinition** - "genomics-fastqc:1". A valid and active job definition and 
111 | it's version. This could be found in the AWS web console > Batch > Job 
112 | definitions.  
113 | **containerOverrides.vcpus** - 1. Request a machine that has at least 1 core.  
114 | **containerOverrides.memory** - 1000. Request a machine that has at least 
115 | 1000MiB of RAM.  
116 | **containerOverrides.command** - ["fastqc *.gz"]. Run the fastq command on all 
117 | the .gz files in the working directory.  
118 | **containerOverrides.environment** - A list of key-value pairs.
119 | 
120 | **name**: JOB_INPUTS.  
121 | **value**: fastq files from a source S3 bucket
122 | 
123 | **name**: JOB_OUTPUTS.  
124 | **value**: "*.html *.zip". Copy all html and zip files from a local directory 
125 | to an S3 bucket.
126 | 
127 | **name**: JOB_OUTPUT_PREFIX.  
128 | **value**: An S3 bucket and a prefix (folder) to copy the output files into.
129 | 
130 | 
131 | There are several examples under the `examples` directory. To run an example, 
132 | edit the example file you want to run (e.g., `examples/batch-fastqc-job.json`),
133 | update the `JOB_INPUTS` to a valid source of your sample fastq files, or leave 
134 | the default value to use a demo sample. Update the `JOB_OUTPUT_PREFIX` to a 
135 | valid s3 bucket and a subfolder where you want the output zip and html files 
136 | to be saved to.
137 | 
138 | Change directory to the examples directory and then submit the job to Batch.
139 | 
140 | ```
141 | cd examples
142 | aws batch submit-job --cli-input-json file://batch-fastqc-job.json
143 | ```
144 | 
145 | Navigate to the Batch jobs page (AWS console -> AWS Batch -> Jobs -> select the 
146 | job queue you used (e.g., `genomics-default-queue`) to track the progress of 
147 | the job. You can click on the job name and them click on the Log stream name 
148 | link to track the stdout on the running task.
149 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-bwa-job.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "bwa",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "bwa:1",
 5 |     "containerOverrides": {
 6 |         "command": ["bwa mem -t 8 -p -o ${SAMPLE_ID}.sam ${REFERENCE_NAME}.fasta ${SAMPLE_ID}_1*.fastq.gz"],
 7 |         "memory": 32000,
 8 |         "environment": [{
 9 |                 "name": "JOB_INPUTS",
10 |                 "value": "s3://1000genomes/pilot_data/data/NA12878/pilot3_unrecal/SRR014820_*.fastq.gz s3://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta*"
11 |             },
12 |             {
13 |                 "name": "SAMPLE_ID",
14 |                 "value": "SRR014820"
15 |             },
16 |             {
17 |                 "name": "REFERENCE_NAME",
18 |                 "value": "Homo_sapiens_assembly38"
19 |             },
20 |             {
21 |                 "name": "JOB_OUTPUTS",
22 |                 "value": "*.sam"
23 |             },
24 |             {
25 |                 "name": "JOB_OUTPUT_PREFIX",
26 |                 "value": "s3://YOUR-BUCKET-NAME/output"
27 |             }
28 |         ]
29 |     }
30 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-fastqc-job.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "fastqc",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "fastqc:1",
 5 |     "containerOverrides": {
 6 |         "command": ["fastqc *.gz"],
 7 |         "environment": [{
 8 |                 "name": "JOB_INPUTS",
 9 |                 "value": "s3://1000genomes/pilot_data/data/NA12878/pilot3_unrecal/SRR014820_*.fastq.gz"
10 |             },
11 |             {
12 |                 "name": "JOB_OUTPUTS",
13 |                 "value": "*.html *.zip"
14 |             },
15 |             {
16 |                 "name": "JOB_OUTPUT_PREFIX",
17 |                 "value": "s3://YOUR-BUCKET-NAME/output"
18 |             }
19 |         ]
20 |     }
21 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-gatk-dictionary.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "gatk-create-dictionary",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "gatkCreateSequenceDictionary:1",
 5 |     "containerOverrides": {
 6 |         "command": ["java -jar /usr/app/picard.jar CreateSequenceDictionary R=Homo_sapiens_assembly38.fasta O=Homo_sapiens_assembly38.dict"],
 7 |         "environment": [{
 8 |                 "name": "JOB_INPUTS",
 9 |                 "value": "s3://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta"
10 |             },
11 |             {
12 |                 "name": "JOB_OUTPUTS",
13 |                 "value": "*.dict"
14 |             },
15 |             {
16 |                 "name": "JOB_OUTPUT_PREFIX",
17 |                 "value": "s3://YOUR-BUCKET-NAME/ref"
18 |             }
19 |         ]
20 |     }
21 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-gatk-htc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "gatkHaploTypeCaller",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "gatkHaplotypeCaller:1",
 5 |     "containerOverrides": {
 6 |         "command": ["gatk --java-options \"-Xmx4g\" HaplotypeCaller -R ${REFERENCE_NAME}.fasta -I ${SAMPLE_ID}.bam -O ${SAMPLE_ID}.vcf.gz -bamout ${SAMPLE_ID}.out.bam"],
 7 |         "environment": [{
 8 |                 "name": "JOB_INPUTS",
 9 |                 "value": "s3://YOUR-BUCKET-NAME/ref s3://YOUR-BUCKET-NAME/samples"
10 |             },
11 |             {
12 |                 "name": "SAMPLE_ID",
13 |                 "value": "SRR014820"
14 |             },
15 |             {
16 |                 "name": "REFERENCE_NAME",
17 |                 "value": "Homo_sapiens_assembly38"
18 |             },
19 |             {
20 |                 "name": "JOB_OUTPUTS",
21 |                 "value": "*.out.bam *.vcf.gz"
22 |             },
23 |             {
24 |                 "name": "JOB_OUTPUT_PREFIX",
25 |                 "value": "s3://YOUR-BUCKET-NAME/output"
26 |             },
27 |             {
28 |                 "name": "JOB_INPUT_S3_COPY_METHOD",
29 |                 "value": "s3sync"
30 |             }
31 |         ]
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-minimap2-job.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "minimap2",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "minimap2:1",
 5 |     "containerOverrides": {
 6 |         "vcpus": 8,
 7 |         "memory": 32000,
 8 |         "command": ["minimap2 -ax map-pb Homo_sapiens_assembly38.fasta SRR014820_1.fastq.gz > SRR014820.sam"],
 9 |         "environment": [{
10 |                 "name": "JOB_INPUTS",
11 |                 "value": "s3://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta s3://1000genomes/pilot_data/data/NA12878/pilot3_unrecal/SRR014820_1.fastq.gz"
12 |             },
13 |             {
14 |                 "name": "JOB_OUTPUTS",
15 |                 "value": "*.sam"
16 |             },
17 |             {
18 |                 "name": "JOB_OUTPUT_PREFIX",
19 |                 "value": "s3://YOUR-BUCKET-NAME/output"
20 |             }
21 |         ]
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-picard-add-missing-groups.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "picard-add-missing-groups",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "picardAddMissingGroups:1",
 5 |     "containerOverrides": {
 6 |         "command": ["java -jar /usr/picard/picard.jar AddOrReplaceReadGroups",
 7 |             " -I ${SAMPLE_ID}.bam -O ${SAMPLE_ID}.rg.bam -RGID 4 --RGLB lib1 ",
 8 |             " --RGPL ILLUMINA --RGPU unit1 --RGSM 20;",
 9 |             " mv ${SAMPLE_ID}.rg.bam ${SAMPLE_ID}.bam;"
10 |         ],
11 |         "environment": [{
12 |                 "name": "JOB_INPUTS",
13 |                 "value": "s3://YOUR-BUCKET-NAME/samples/SRR014820.bam"
14 |             },
15 |             {
16 |                 "name": "SAMPLE_ID",
17 |                 "value": "SRR014820"
18 |             },
19 |             {
20 |                 "name": "JOB_OUTPUTS",
21 |                 "value": "*.bam"
22 |             },
23 |             {
24 |                 "name": "JOB_OUTPUT_PREFIX",
25 |                 "value": "s3://YOUR-BUCKET-NAME/output"
26 |             }
27 |         ]
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-samtools-index.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "samtoolsIndex",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "samtoolsIndex:1",
 5 |     "containerOverrides": {
 6 |         "command": ["samtools index ${SAMPLE_ID}.bam"],
 7 |         "environment": [{
 8 |                 "name": "JOB_INPUTS",
 9 |                 "value": "s3://YOUR-BUCKET-NAME/samples/SRR014820.bam"
10 |             },
11 |             {
12 |                 "name": "SAMPLE_ID",
13 |                 "value": "SRR014820"
14 |             },
15 |             {
16 |                 "name": "JOB_OUTPUTS",
17 |                 "value": "*.bam *.bai"
18 |             },
19 |             {
20 |                 "name": "JOB_OUTPUT_PREFIX",
21 |                 "value": "s3://YOUR-BUCKET-NAME/output"
22 |             }
23 |         ]
24 |     }
25 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/examples/batch-samtools-sort.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "jobName": "samtoolsSort",
 3 |     "jobQueue": "genomics-default-queue",
 4 |     "jobDefinition": "samtoolsSort:1",
 5 |     "containerOverrides": {
 6 |         "command": ["samtools sort -@ 4 -o ${SAMPLE_ID}.bam ${SAMPLE_ID}.sam"],
 7 |         "environment": [{
 8 |                 "name": "JOB_INPUTS",
 9 |                 "value": "s3://YOUR-BUCKET-NAME/samples/SRR014820.sam"
10 |             },
11 |             {
12 |                 "name": "SAMPLE_ID",
13 |                 "value": "SRR014820"
14 |             },
15 |             {
16 |                 "name": "JOB_OUTPUTS",
17 |                 "value": "*.bam"
18 |             },
19 |             {
20 |                 "name": "JOB_OUTPUT_PREFIX",
21 |                 "value": "s3://YOUR-BUCKET-NAME/output"
22 |             }
23 |         ]
24 |     }
25 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/jest.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   roots: ['<rootDir>/test'],
3 |   testMatch: ['**/*.test.ts'],
4 |   transform: {
5 |     '^.+\\.tsx?$': 'ts-jest'
6 |   }
7 | };
8 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/aws-genomics-cdk-stack.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as ec2 from "@aws-cdk/aws-ec2";
 3 | import * as s3 from "@aws-cdk/aws-s3";
 4 | import * as config from "../app.config.json";
 5 | import GenomicsVpcStack from "./vpc/vpc-stack";
 6 | import GenomicsBatchStack from "./batch/batch-stack";
 7 | 
 8 | //Workflows
 9 | import { WorkflowConfig } from "./workflows/workflow-config";
10 | import VariantCallingStateMachine from "./workflows/variant-calling-stack";
11 | 
12 | export class AwsGenomicsCdkStack extends cdk.Stack {
13 |   constructor(scope: cdk.Construct, id: string, props: cdk.StackProps) {
14 |     super(scope, id, props);
15 | 
16 |     // Create a new VPC or use an existing one
17 |     let vpc: ec2.Vpc;
18 |     if (config.VPC.createVPC) {
19 |       vpc = new GenomicsVpcStack(this, config.VPC.VPCName, props).vpc;
20 |     } else {
21 |       vpc = ec2.Vpc.fromLookup(this, `${config.projectName}-vpc-lookup`, {
22 |         vpcName: config.VPC.VPCName,
23 |       }) as ec2.Vpc;
24 |     }
25 | 
26 |     // Create a new bucket if set in the config
27 |     if (!config.S3.existingBucket) {
28 |       const bucketProps = {
29 |         bucketName: config.S3.bucketName,
30 |         encryption: s3.BucketEncryption.S3_MANAGED,
31 |         removalPolicy: cdk.RemovalPolicy.RETAIN,
32 |       };
33 | 
34 |       new s3.Bucket(this, bucketProps.bucketName, bucketProps);
35 |     }
36 | 
37 |     // Create an AWS Batch resources
38 |     const batchProps = {
39 |       stackProps: props,
40 |       vpc: vpc,
41 |       bucket: config.S3.bucketName,
42 |     };
43 | 
44 |     const batch = new GenomicsBatchStack(
45 |       this,
46 |       `${config.projectName}-batch`,
47 |       batchProps
48 |     );
49 | 
50 |     // loop throgh the app.config workflows file and set infrastructure for
51 |     // the provided workflows
52 |     let workflow: WorkflowConfig;
53 |     for (let i = 0; i < config.workflows.length; i++) {
54 |       workflow = config.workflows[i] as WorkflowConfig;
55 | 
56 |       switch (workflow.name) {
57 |         case "variantCalling":
58 |           new VariantCallingStateMachine(
59 |             this,
60 |             `${config.projectName}-${workflow.name}`,
61 |             {
62 |               stackProps: props,
63 |               batchQueue:
64 |                 workflow.spot === true
65 |                   ? batch.genomicsDefaultQueue
66 |                   : batch.genomicsHighPriorityQueue,
67 |               taskRole: batch.taskRole,
68 |             }
69 |           );
70 |           break;
71 |       }
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/batch/batch-compute-environmnet-construct.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as batch from "@aws-cdk/aws-batch";
 3 | import * as ec2 from "@aws-cdk/aws-ec2";
 4 | import * as ecs from "@aws-cdk/aws-ecs";
 5 | import * as iam from "@aws-cdk/aws-iam";
 6 | import * as config from "../../app.config.json";
 7 | 
 8 | export class GenomicsComputeEnvironmentProps {
 9 |   readonly computeResourcesType?: batch.ComputeResourceType;
10 |   readonly vpc: ec2.Vpc;
11 |   readonly allocationStrategy?: batch.AllocationStrategy;
12 |   readonly computeResourcesTags?: { [key: string]: string };
13 |   readonly instanceProfileArn: string;
14 |   readonly fleetRole: iam.Role;
15 |   readonly serviceRole: iam.Role;
16 |   readonly instanceTypes: ec2.InstanceType[];
17 |   readonly launchTemplateName: string;
18 |   readonly maxvCpus: number;
19 |   readonly computeEnvironmentName: string;
20 | }
21 | 
22 | export default class GenomicsComputeEnvironment extends cdk.Construct {
23 |   public readonly computeEnvironment: batch.ComputeEnvironment;
24 | 
25 |   constructor(
26 |     scope: cdk.Construct,
27 |     id: string,
28 |     props: GenomicsComputeEnvironmentProps
29 |   ) {
30 |     super(scope, id);
31 | 
32 |     const computeResources = {
33 |       type: props.computeResourcesType ?? batch.ComputeResourceType.SPOT,
34 |       vpc: props.vpc,
35 |       allocationStrategy:
36 |         props.allocationStrategy ??
37 |         batch.AllocationStrategy.SPOT_CAPACITY_OPTIMIZED,
38 |       computeResourcesTags: props.computeResourcesTags ?? {
39 |         Name: `${config.projectName}-instance`
40 |       },
41 |       image: ecs.EcsOptimizedImage.amazonLinux2(),
42 |       instanceRole: props.instanceProfileArn,
43 |       spotFleetRole: props.fleetRole,
44 |       serviceRole: props.serviceRole,
45 |       instanceTypes: props.instanceTypes,
46 |       launchTemplate: {
47 |         launchTemplateName: props.launchTemplateName,
48 |       },
49 |       maxvCpus: props.maxvCpus,
50 |     };
51 | 
52 |     const computeEnvironmentProps = {
53 |       computeEnvironmentName: props.computeEnvironmentName,
54 |       enabled: true,
55 |       managed: true,
56 |       serviceRole: props.serviceRole,
57 |       computeResources: computeResources,
58 |     };
59 | 
60 |     this.computeEnvironment = new batch.ComputeEnvironment(
61 |       this,
62 |       computeEnvironmentProps.computeEnvironmentName,
63 |       computeEnvironmentProps
64 |     );
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/batch/batch-iam-stack.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as iam from "@aws-cdk/aws-iam";
 3 | import * as path from "path";
 4 | import * as fs from "fs";
 5 | import * as config from "../../app.config.json";
 6 | 
 7 | export interface GenomicsIamProps {
 8 |     readonly bucketName: string;
 9 |     readonly account: string;
10 | }
11 | 
12 | export default class GenomicsIam extends cdk.Stack {
13 |     public readonly serviceRole: iam.Role;
14 |     public readonly taskRole: iam.Role;
15 |     public readonly instanceProfileArn: string;
16 |     public readonly fleetRole: iam.Role;
17 |     
18 |     constructor(scope: cdk.Construct, id: string, props: GenomicsIamProps) {
19 |     super(scope, id);
20 |         
21 |         // Create a task role to be used by AWS batch container
22 |         const taskRoleProps = {
23 |             roleName: `${config.projectName}-ecs-task-role`,
24 |             assumedBy: new iam.ServicePrincipal("ecs-tasks.amazonaws.com"),
25 |             description: "allow ecs task to assume a role for the genomics pipleine",
26 |             managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3ReadOnlyAccess")]
27 |         };
28 |         
29 |         this.taskRole = new iam.Role(this, taskRoleProps.roleName, taskRoleProps);
30 |         
31 |         
32 |         // Create an instance role for the EC2 host machine for AWS Batch
33 |         const instanceRoleProps = {
34 |             roleName: `${config.projectName}-batch-instance-role`,
35 |             assumedBy: new iam.ServicePrincipal("ec2.amazonaws.com"),
36 |             description: "allow ec2 instance to assume a role for the genomics pipleine",
37 |             managedPolicies: [
38 |                 iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonEC2ContainerServiceforEC2Role"),
39 |                 iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3ReadOnlyAccess"),
40 |                 iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSSMManagedInstanceCore")
41 |             ]
42 |         };
43 |         
44 |         const instanceRole = new iam.Role(this, instanceRoleProps.roleName, instanceRoleProps);
45 |         
46 |         
47 |         // Create a spot fleet role to be used by AWS Batch when launching spot instances
48 |         const fleetRoleProps = {
49 |             roleName: `${config.projectName}-spot-fleet-role`,
50 |             assumedBy: new iam.ServicePrincipal("ec2.amazonaws.com"),
51 |             description: "allow ec2 instance to assume a role for the genomics pipleine",
52 |             managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonEC2SpotFleetTaggingRole")]
53 |         };
54 |         
55 |         this.fleetRole = new iam.Role(this, fleetRoleProps.roleName, fleetRoleProps);
56 |         
57 |         
58 |         // Create a service role for AWS Batch so it can assume other roles for the genomics pipeline
59 |         const batchServiceRoleProps = {
60 |             roleName: `${config.projectName}-batch-service-role`,
61 |             assumedBy: new iam.ServicePrincipal("batch.amazonaws.com"),
62 |             description: "allow batch to assume a role for the genomics pipleine",
63 |             managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AWSBatchServiceRole")]
64 |         };
65 |         
66 |         this.serviceRole = new iam.Role(this, batchServiceRoleProps.roleName, batchServiceRoleProps);
67 |         
68 |         
69 |         // Create a policy to allow read and writes for an S3 bucket and add it to the task and instance roles
70 |         const filePath = path.join(__dirname, "../../assets/genomics-policy-s3.json");
71 |         const bucketPolicy = fs.readFileSync(filePath, {encoding: "utf-8"}).replace(/BUCKET_NAME/g, props.bucketName);
72 |         
73 |         const policyProps = {
74 |             policyName: `${config.projectName}-policy-s3`,
75 |             document: iam.PolicyDocument.fromJson(JSON.parse(bucketPolicy)),
76 |             force: true,
77 |             roles: [this.taskRole, instanceRole]
78 |         }
79 |         const policy = new iam.Policy(this, policyProps.policyName, policyProps);
80 |         
81 |         
82 |         // Create an instance profile to be used by AWS Batch compute environment
83 |         const instanceProfileProps = {
84 |           roles: [instanceRoleProps.roleName],
85 |           instanceProfileName: `${config.projectName}-batch-instance-profile`
86 |         };
87 |         const instanceProfile = new iam.CfnInstanceProfile(this, instanceProfileProps.instanceProfileName, instanceProfileProps);
88 |         this.instanceProfileArn = `arn:aws:iam::${props.account}:instance-profile/${instanceProfileProps.instanceProfileName}`;
89 |     }
90 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/batch/batch-stack.ts:
--------------------------------------------------------------------------------
  1 | import * as cdk from "@aws-cdk/core";
  2 | import * as batch from "@aws-cdk/aws-batch";
  3 | import * as ec2 from "@aws-cdk/aws-ec2";
  4 | import * as iam from "@aws-cdk/aws-iam";
  5 | import GenomicsComputeEnvironment from "./batch-compute-environmnet-construct";
  6 | import GenomicsLaunchTemplate from "./launch-template-construct";
  7 | import GenomicsJobQueue from "./job-queue-construct";
  8 | import GenomicsIam from "./batch-iam-stack";
  9 | import * as config from "../../app.config.json";
 10 | 
 11 | 
 12 | export interface GenomicsBatchStackProps {
 13 |   readonly stackProps: cdk.StackProps;
 14 |   readonly vpc: ec2.Vpc;
 15 |   readonly bucket: string;
 16 | };
 17 | 
 18 | export default class GenomicsBatchStack extends cdk.Stack {
 19 |   
 20 |   public readonly genomicsDefaultQueue: batch.JobQueue;
 21 |   public readonly genomicsHighPriorityQueue: batch.JobQueue;
 22 |   public readonly taskRole: iam.Role;
 23 | 
 24 |   constructor(scope: cdk.Construct, id: string, props: GenomicsBatchStackProps) {
 25 |     super(scope, id, props.stackProps);
 26 |     
 27 |     const env = props.stackProps.env as cdk.Environment;
 28 |     
 29 |     // Create IAM roles and policies for AWS Batch
 30 |     const genomicsIamProps = {
 31 |       bucketName: props.bucket,
 32 |       account: env.account as string
 33 |     }
 34 |     
 35 |     const genomicsIam = new GenomicsIam(this, `${config.projectName}-iam`, genomicsIamProps);
 36 |     this.taskRole = genomicsIam.taskRole;
 37 |     
 38 |     
 39 |     
 40 |     // Create a EC2 Launch Template to be used by AWS Batch
 41 |     const launchTemplateProps = {
 42 |       launchTemplateName: `${config.projectName}-launch-template`,
 43 |       volumeSize: config.batch.defaultVolumeSize
 44 |     };
 45 |     
 46 |     const launchTemplate = new GenomicsLaunchTemplate(this, launchTemplateProps.launchTemplateName, launchTemplateProps);
 47 |     
 48 |     
 49 |     // Create AWS Batch SPOT and On-Demand compute environments
 50 |     let envInstanceType = [];
 51 |     for (let i = 0; i < config.batch.instanceTypes.length; i++) {
 52 |       envInstanceType.push(new ec2.InstanceType(config.batch.instanceTypes[i]));
 53 |     }
 54 |     
 55 |     // Create spot compute environment for the genomics pipeline using SPOT instances
 56 |     const spotComputeEnvironmentProps = {
 57 |       computeEnvironmentName: `${config.projectName}-spot-compute-environment`,
 58 |       vpc: props.vpc,
 59 |       instanceTypes: envInstanceType,
 60 |       maxvCpus: config.batch.spotMaxVCPUs,
 61 |       instanceProfileArn: genomicsIam.instanceProfileArn,
 62 |       fleetRole: genomicsIam.fleetRole,
 63 |       serviceRole: genomicsIam.serviceRole,
 64 |       launchTemplateName: launchTemplate.template.launchTemplateName as string,
 65 |     };
 66 |     
 67 |     const spotComputeEnvironment = new GenomicsComputeEnvironment(this, 
 68 |       spotComputeEnvironmentProps.computeEnvironmentName, 
 69 |       spotComputeEnvironmentProps
 70 |     );
 71 |     
 72 |     // Create on demand compute environment using on demand instances
 73 |     const onDemandComputeEnvironmentProps = {
 74 |       computeEnvironmentName: `${config.projectName}-on-demand-compute-environment`,
 75 |       computeResourcesType: batch.ComputeResourceType.ON_DEMAND,
 76 |       allocationStrategy: batch.AllocationStrategy.BEST_FIT,
 77 |       vpc: props.vpc,
 78 |       instanceTypes: envInstanceType,
 79 |       maxvCpus: config.batch.onDemendMaxVCPUs,
 80 |       instanceProfileArn: genomicsIam.instanceProfileArn,
 81 |       fleetRole: genomicsIam.fleetRole,
 82 |       serviceRole: genomicsIam.serviceRole,
 83 |       launchTemplateName: launchTemplate.template.launchTemplateName as string,
 84 |     };
 85 |     
 86 |     const onDemandComputeEnvironment = new GenomicsComputeEnvironment(this, 
 87 |       onDemandComputeEnvironmentProps.computeEnvironmentName, 
 88 |       onDemandComputeEnvironmentProps
 89 |     );
 90 |       
 91 |       
 92 |     // Create default queue, using spot first and then on-demand instances
 93 |     const defaultQueueProps = {
 94 |       computeEnvironments: [
 95 |         spotComputeEnvironment.computeEnvironment
 96 |       ],
 97 |       jobQueueName: `${config.projectName}-default-queue`,
 98 |       priority: 100
 99 |     };
100 |     
101 |     const defaultQueue = new GenomicsJobQueue(this, defaultQueueProps.jobQueueName, defaultQueueProps);
102 |     this.genomicsDefaultQueue = defaultQueue.jobQueue;
103 |     
104 |     
105 |     // Create high priority queue, using on-demand instances and then spot
106 |     const highPriorityQueueProps = {
107 |       computeEnvironments: [
108 |         onDemandComputeEnvironment.computeEnvironment,
109 |         spotComputeEnvironment.computeEnvironment
110 |         
111 |       ],
112 |       jobQueueName: `${config.projectName}-high-priority-queue`,
113 |       priority: 1000
114 |     }
115 |     
116 |     const highPriorityQueue = new GenomicsJobQueue(this, highPriorityQueueProps.jobQueueName, highPriorityQueueProps);
117 |     this.genomicsHighPriorityQueue = highPriorityQueue.jobQueue;
118 |     
119 |     
120 |   }
121 | }
122 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/batch/job-queue-construct.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as batch from "@aws-cdk/aws-batch";
 3 | 
 4 | export interface GenomicsJobQueueProps {
 5 |   readonly computeEnvironments: batch.ComputeEnvironment[];
 6 |   readonly jobQueueName: string;
 7 |   readonly priority: number;
 8 | }
 9 | 
10 | export default class GenomicsJobQueue extends cdk.Construct {
11 |   public readonly jobQueue: batch.JobQueue;
12 | 
13 |   constructor(scope: cdk.Construct, id: string, props: GenomicsJobQueueProps) {
14 |     super(scope, id);
15 | 
16 |     let environments = [];
17 |     for (let i = 0; i < props.computeEnvironments.length; i++) {
18 |       let environment = {
19 |         computeEnvironment: props.computeEnvironments[i],
20 |         order: i + 1,
21 |       };
22 | 
23 |       environments.push(environment);
24 |     }
25 | 
26 |     let jobQueueProps = {
27 |       jobQueueName: props.jobQueueName,
28 |       priority: props.priority,
29 |       computeEnvironments: environments,
30 |     };
31 | 
32 |     this.jobQueue = new batch.JobQueue(
33 |       this,
34 |       jobQueueProps.jobQueueName,
35 |       jobQueueProps
36 |     );
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/batch/launch-template-construct.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as ec2 from "@aws-cdk/aws-ec2";
 3 | import * as path from "path";
 4 | import * as fs from "fs";
 5 | 
 6 | 
 7 | export interface GenomicsLaunchTemplateProps {
 8 |   readonly launchTemplateName: string;
 9 |   readonly volumeSize: number;
10 |   readonly volumeType?: string;
11 |   readonly encrypted?: boolean;
12 |   readonly userData?: string;
13 | }
14 | 
15 | export default class GenomicsLaunchTemplate extends cdk.Construct {
16 |   public readonly template: ec2.CfnLaunchTemplate;
17 | 
18 |   constructor(
19 |     scope: cdk.Construct,
20 |     id: string,
21 |     props: GenomicsLaunchTemplateProps
22 |   ) {
23 |     super(scope, id);
24 | 
25 |     let userData;
26 | 
27 |     if (props.userData !== undefined) {
28 |       userData = props.userData;
29 |     } else {
30 |       const filePath = path.join(
31 |         __dirname,
32 |         "../../assets/launch_template_user_data.txt"
33 |       );
34 |       userData = fs.readFileSync(filePath).toString("base64");
35 |     }
36 | 
37 |     const launchTemplateProps = {
38 |       launchTemplateName: props.launchTemplateName,
39 |       launchTemplateData: {
40 |         blockDeviceMappings: [
41 |           {
42 |             deviceName: "/dev/xvda",
43 |             ebs: {
44 |               encrypted: props.encrypted ?? true,
45 |               volumeSize: props.volumeSize,
46 |               volumeType: props.volumeType ?? "gp2",
47 |             },
48 |           },
49 |         ],
50 |         userData: userData,
51 |       },
52 |     };
53 | 
54 |     this.template = new ec2.CfnLaunchTemplate(
55 |       this,
56 |       props.launchTemplateName,
57 |       launchTemplateProps
58 |     );
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/vpc/vpc-stack.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as ec2 from "@aws-cdk/aws-ec2";
 3 | import * as config from "../../app.config.json";
 4 | 
 5 | export default class GenomicsVpcStack extends cdk.Stack {
 6 |   public readonly vpc: ec2.Vpc;
 7 | 
 8 |   constructor(scope: cdk.Construct, id: string, props: cdk.StackProps) {
 9 |     super(scope, id, props);
10 | 
11 |     const subnetConf = [
12 |       {
13 |         cidrMask: config.VPC.cidrMask,
14 |         name: "private",
15 |         subnetType: ec2.SubnetType.PRIVATE,
16 |       },
17 |       {
18 |         cidrMask: config.VPC.cidrMask,
19 |         name: "public",
20 |         subnetType: ec2.SubnetType.PUBLIC,
21 |       }
22 |     ];
23 | 
24 |     const vpcProp = {
25 |       cidr: config.VPC.cidr,
26 |       maxAZs: config.VPC.maxAZs,
27 |       subnetConfiguration: subnetConf
28 |     };
29 | 
30 |     this.vpc = new ec2.Vpc(this, config.VPC.VPCName, vpcProp);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/workflows/genomics-task-construct.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as batch from "@aws-cdk/aws-batch";
 3 | import * as sfn from "@aws-cdk/aws-stepfunctions";
 4 | import * as tasks from "@aws-cdk/aws-stepfunctions-tasks";
 5 | 
 6 | export interface GenomicsTaskProps {
 7 |     readonly taskName: string;
 8 |     readonly command: string[];
 9 |     readonly jobDefinition: batch.JobDefinition;
10 |     readonly queue: batch.JobQueue;
11 |     readonly awsCliPath?: string;
12 |     readonly environment?: { [key: string]: string };
13 | }
14 | 
15 | export default class GenomicsTask extends cdk.Construct {
16 |     
17 |     public readonly task: tasks.BatchSubmitJob;
18 |     
19 |     constructor(scope: cdk.Construct, id: string, props: GenomicsTaskProps) {
20 |         super(scope, id);
21 |     
22 |         const defaultEnvironment = {
23 |             JOB_WORKFLOW_NAME: sfn.JsonPath.stringAt("$$.StateMachine.Name"),
24 |             JOB_WORKFLOW_EXECUTION: sfn.JsonPath.stringAt("$$.Execution.Name"),
25 |             JOB_OUTPUT_PREFIX: sfn.JsonPath.stringAt("$.params.environment.JOB_OUTPUT_PREFIX"),
26 |             JOB_AWS_CLI_PATH: props.awsCliPath ?? "/opt/aws-cli/bin"
27 |         }
28 |         
29 |         let environment;
30 |         if(props.environment){
31 |             environment = {...defaultEnvironment, ...props.environment};
32 |         }
33 |         else{
34 |             environment = defaultEnvironment;
35 |         }
36 |     
37 |         const taskContainerProps = {
38 |           command: props.command,
39 |           environment: environment
40 |         };
41 |         const taskProps = {
42 |             jobName: props.taskName,
43 |             jobDefinitionArn: props.jobDefinition.jobDefinitionArn,
44 |             jobQueueArn: props.queue.jobQueueArn,
45 |             containerOverrides: taskContainerProps,
46 |             inputPath: "$",
47 |             resultPath: "$.result"
48 |         };
49 |     
50 |         this.task = new tasks.BatchSubmitJob(this, taskProps.jobName, taskProps);
51 |     }
52 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/workflows/job-definition-construct.ts:
--------------------------------------------------------------------------------
 1 | import * as cdk from "@aws-cdk/core";
 2 | import * as batch from "@aws-cdk/aws-batch";
 3 | import * as ecs from "@aws-cdk/aws-ecs";
 4 | 
 5 | import {GenomicsJobDefinitionProps} from "./job-definitions";
 6 | 
 7 | export default class GenomicsJobDefinition extends cdk.Construct{
 8 |     
 9 |     public readonly jobDefinition: batch.JobDefinition;
10 |     
11 |     constructor(scope: cdk.Construct, id: string, props: GenomicsJobDefinitionProps) {
12 |         super(scope, id);
13 |     
14 |         const repositoryUri = `${props.env.account}.dkr.ecr.${props.env.region}.amazonaws.com/${props.repository}`;
15 |         const containerImage = ecs.ContainerImage.fromRegistry(repositoryUri);
16 |         
17 |         const mountPoints = [
18 |             {
19 |                 containerPath: "/opt/aws-cli",
20 |                 readOnly: false,
21 |                 sourceVolume: "awscli"
22 |             },
23 |             {
24 |                 containerPath: "/data",
25 |                 readOnly: false,
26 |                 sourceVolume: "data"
27 |             }
28 |         ];
29 |         
30 |         const volumes = [
31 |             {
32 |                 name: "awscli",
33 |                 host: { sourcePath: "/opt/aws-cli" }
34 |             },
35 |             {
36 |                 name: "data",
37 |                 host: { sourcePath: "/data" }
38 |             }
39 |         ];
40 |         
41 |         const jobDefinitionContainerProps = {
42 |             image: containerImage,
43 |             jobRole: props.jobRole,
44 |             memoryLimitMiB: props.memoryLimit,
45 |             mountPoints: mountPoints,
46 |             volumes: volumes,
47 |             vcpus: props.vcpus ?? 1
48 |         };
49 |         
50 |         const jobDefinitionProps = {
51 |           container: jobDefinitionContainerProps,
52 |           jobDefinitionName: id,
53 |           retryAttempts: props.retryAttempts ?? 1,
54 |           timeout: cdk.Duration.seconds(props.timeout ?? 3600)
55 |         };
56 |         
57 |         this.jobDefinition = new batch.JobDefinition(this, id, jobDefinitionProps);
58 |     }
59 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/workflows/job-definitions.ts:
--------------------------------------------------------------------------------
  1 | import * as cdk from "@aws-cdk/core";
  2 | import * as iam from "@aws-cdk/aws-iam";
  3 | 
  4 | export enum GenomicsJobDefinitionTypes {
  5 |     FASTQC = "fastqc",
  6 |     MINIMAP2 = "minimap2",
  7 |     GATK = "gatk",
  8 |     BWA = "bwa",
  9 |     SAMTOOLS = "samtools",
 10 |     PICARD = "picard"
 11 | }
 12 | 
 13 | export interface GenomicsJobDefinitionProps {
 14 |   readonly repository: string;
 15 |   readonly jobDefinitionName?: string;
 16 |   readonly retryAttempts?: number;
 17 |   readonly timeout?: number;
 18 |   readonly env: cdk.ResourceEnvironment;
 19 |   readonly stack: cdk.Stack;
 20 |   readonly jobRole: iam.Role;
 21 |   readonly memoryLimit?: number;
 22 |   readonly vcpus?: number;
 23 | }
 24 | 
 25 | export class JobDefinitionBase implements GenomicsJobDefinitionProps {
 26 |   public repository: string;
 27 |   public jobDefinitionName: string;
 28 |   public retryAttempts?: number;
 29 |   public timeout?: number;
 30 |   public env: cdk.ResourceEnvironment;
 31 |   public stack: cdk.Stack;
 32 |   public jobRole: iam.Role;
 33 |   public memoryLimit?: number;
 34 |   public vcpus?: number;
 35 | 
 36 |   constructor() {
 37 |     this.retryAttempts = 1;
 38 |     this.timeout = 3600;
 39 |     this.memoryLimit = 16000;
 40 |     this.vcpus = 8;
 41 |   }
 42 | }
 43 | 
 44 | export class FastQcJobDefinition extends JobDefinitionBase {
 45 |   constructor(props: GenomicsJobDefinitionProps) {
 46 |     super();
 47 |     this.repository = props.repository;
 48 |     this.jobDefinitionName = GenomicsJobDefinitionTypes.FASTQC;
 49 |     this.retryAttempts = props.retryAttempts ?? this.retryAttempts;
 50 |     this.timeout = props.timeout ?? this.timeout;
 51 |     this.env = props.env;
 52 |     this.stack = props.stack;
 53 |     this.jobRole = props.jobRole;
 54 |     this.memoryLimit = props.memoryLimit ?? this.memoryLimit;
 55 |     this.vcpus = props.vcpus ?? this.vcpus;
 56 |   }
 57 | }
 58 | 
 59 | export class Minimap2JObDefinition extends JobDefinitionBase {
 60 |   constructor(props: GenomicsJobDefinitionProps) {
 61 |     super();
 62 |     this.repository = props.repository;
 63 |     this.jobDefinitionName = GenomicsJobDefinitionTypes.MINIMAP2;
 64 |     this.retryAttempts = props.retryAttempts ?? this.retryAttempts;
 65 |     this.timeout = props.timeout ?? this.timeout;
 66 |     this.env = props.env;
 67 |     this.stack = props.stack;
 68 |     this.jobRole = props.jobRole;
 69 |     this.memoryLimit = props.memoryLimit ?? this.memoryLimit;
 70 |     this.vcpus = props.vcpus ?? this.vcpus;
 71 |   }
 72 | }
 73 | 
 74 | export class GatkJObDefinition extends JobDefinitionBase {
 75 |   constructor(props: GenomicsJobDefinitionProps) {
 76 |     super();
 77 |     this.repository = props.repository;
 78 |     this.jobDefinitionName = GenomicsJobDefinitionTypes.GATK;
 79 |     this.retryAttempts = props.retryAttempts ?? this.retryAttempts;
 80 |     this.timeout = props.timeout ?? this.timeout;
 81 |     this.env = props.env;
 82 |     this.stack = props.stack;
 83 |     this.jobRole = props.jobRole;
 84 |     this.memoryLimit = props.memoryLimit ?? this.memoryLimit;
 85 |     this.vcpus = props.vcpus ?? this.vcpus;
 86 |   }
 87 | }
 88 | 
 89 | export class BwaJObDefinition extends JobDefinitionBase {
 90 |   constructor(props: GenomicsJobDefinitionProps) {
 91 |     super();
 92 |     this.repository = props.repository;
 93 |     this.jobDefinitionName = GenomicsJobDefinitionTypes.BWA;
 94 |     this.retryAttempts = props.retryAttempts ?? this.retryAttempts;
 95 |     this.timeout = props.timeout ?? this.timeout;
 96 |     this.env = props.env;
 97 |     this.stack = props.stack;
 98 |     this.jobRole = props.jobRole;
 99 |     this.memoryLimit = props.memoryLimit ?? this.memoryLimit;
100 |     this.vcpus = props.vcpus ?? this.vcpus;
101 |   }
102 | }
103 | 
104 | export class SamToolsJObDefinition extends JobDefinitionBase {
105 |   constructor(props: GenomicsJobDefinitionProps) {
106 |     super();
107 |     this.repository = props.repository;
108 |     this.jobDefinitionName = GenomicsJobDefinitionTypes.SAMTOOLS;
109 |     this.retryAttempts = props.retryAttempts ?? this.retryAttempts;
110 |     this.timeout = props.timeout ?? this.timeout;
111 |     this.env = props.env;
112 |     this.stack = props.stack;
113 |     this.jobRole = props.jobRole;
114 |     this.memoryLimit = props.memoryLimit ?? this.memoryLimit;
115 |     this.vcpus = props.vcpus ?? this.vcpus;
116 |   }
117 | }
118 | 
119 | export class PicardJObDefinition extends JobDefinitionBase {
120 |   constructor(props: GenomicsJobDefinitionProps) {
121 |     super();
122 |     this.repository = props.repository;
123 |     this.jobDefinitionName = GenomicsJobDefinitionTypes.PICARD;
124 |     this.retryAttempts = props.retryAttempts ?? this.retryAttempts;
125 |     this.timeout = props.timeout ?? this.timeout;
126 |     this.env = props.env;
127 |     this.stack = props.stack;
128 |     this.jobRole = props.jobRole;
129 |     this.memoryLimit = props.memoryLimit ?? this.memoryLimit;
130 |     this.vcpus = props.vcpus ?? this.vcpus;
131 |   }
132 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/lib/workflows/workflow-config.ts:
--------------------------------------------------------------------------------
1 | export interface WorkflowConfig {
2 |     readonly name: string;
3 |     readonly spot: boolean;
4 | }


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "aws-genomics-cdk",
 3 |   "version": "0.1.0",
 4 |   "bin": {
 5 |     "aws-genomics-cdk": "bin/aws-genomics-cdk.js"
 6 |   },
 7 |   "scripts": {
 8 |     "build": "tsc",
 9 |     "watch": "tsc -w",
10 |     "test": "jest",
11 |     "cdk": "cdk"
12 |   },
13 |   "devDependencies": {
14 |     "@aws-cdk/assert": "1.128.0",
15 |     "@types/node": "16.11.0",
16 |     "aws-cdk": "1.128.0",
17 |     "ts-node": "^10.3.0",
18 |     "typescript": "~4.4.4"
19 |   },
20 |   "dependencies": {
21 |     "@aws-cdk/aws-batch": "^1.128.0",
22 |     "@aws-cdk/aws-ec2": "^1.128.0",
23 |     "@aws-cdk/aws-stepfunctions": "^1.128.0",
24 |     "@aws-cdk/aws-stepfunctions-tasks": "^1.128.0",
25 |     "@aws-cdk/core": "1.128.0",
26 |     "source-map-support": "^0.5.20"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/test/aws-genomics-cdk.test.ts:
--------------------------------------------------------------------------------
 1 | import { expect as expectCDK, matchTemplate, MatchStyle } from '@aws-cdk/assert';
 2 | import * as cdk from '@aws-cdk/core';
 3 | import * as AwsGenomicsCdk from '../lib/aws-genomics-cdk-stack';
 4 | 
 5 | test('Empty Stack', () => {
 6 |     const app = new cdk.App();
 7 |     // WHEN
 8 |     const stack = new AwsGenomicsCdk.AwsGenomicsCdkStack(app, 'MyTestStack');
 9 |     // THEN
10 |     expectCDK(stack).to(matchTemplate({
11 |       "Resources": {}
12 |     }, MatchStyle.EXACT))
13 | });
14 | 


--------------------------------------------------------------------------------
/src/aws-genomics-cdk/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2018",
 4 |     "module": "commonjs",
 5 |     "lib": ["es2018"],
 6 |     "declaration": true,
 7 |     "strict": true,
 8 |     "noImplicitAny": true,
 9 |     "strictNullChecks": true,
10 |     "noImplicitThis": true,
11 |     "alwaysStrict": true,
12 |     "noUnusedLocals": false,
13 |     "noUnusedParameters": false,
14 |     "noImplicitReturns": true,
15 |     "noFallthroughCasesInSwitch": false,
16 |     "inlineSourceMap": true,
17 |     "inlineSources": true,
18 |     "experimentalDecorators": true,
19 |     "strictPropertyInitialization": false,
20 |     "typeRoots": ["./node_modules/@types"],
21 |     "resolveJsonModule": true
22 |   },
23 |   "exclude": ["cdk.out"]
24 | }
25 | 


--------------------------------------------------------------------------------
/src/containers/.gitignore:
--------------------------------------------------------------------------------
1 | job-definition.json


--------------------------------------------------------------------------------
/src/containers/_common/README.md:
--------------------------------------------------------------------------------
1 | # Common assets for tooling containers
2 | 
3 | These are assets that are used to build all tooling containers.
4 | 
5 | * `build.sh`: a generic build script that first builds a base image for a container, then builds an AWS specific image
6 | * `entrypoint.aws.sh`: a generic entrypoint script that wraps a call to a binary tool in the container with handlers data staging from/to S3
7 | 


--------------------------------------------------------------------------------
/src/containers/_common/aws.dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE
 2 | FROM ${BASE_IMAGE}:latest
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install -y gettext-base
 6 | RUN apt-get clean
 7 | 
 8 | ENV PATH=/opt/bin:$PATH
 9 | 
10 | COPY _common/entrypoint.aws.sh /opt/bin/entrypoint.aws.sh
11 | RUN chmod +x /opt/bin/entrypoint.aws.sh
12 | 
13 | WORKDIR /scratch
14 | 
15 | ENTRYPOINT ["entrypoint.aws.sh"]
16 | 


--------------------------------------------------------------------------------
/src/containers/_common/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | IMAGE_NAME=$1
 5 | IMAGE_TAG=$2
 6 | 
 7 | echo "Docker Login to ECR"
 8 | eval $(aws ecr get-login --no-include-email --region ${AWS_REGION})
 9 | 
10 | # retrieve image layer cache from previously built build stage
11 | docker pull ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} || true
12 | 
13 | # (re)build just the build stage of the image
14 | docker build \
15 |     --target build \
16 |     --cache-from ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} \
17 |     --build-arg VERSION=$IMAGE_TAG \
18 |     -t ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} .
19 | 
20 | # build the base image
21 | docker build \
22 |     --cache-from ${REGISTRY}/${IMAGE_NAME}:build-${IMAGE_TAG} \
23 |     --build-arg VERSION=$IMAGE_TAG \
24 |     -t $IMAGE_NAME .
25 | 
26 | # build the image with an AWS specific entrypoint
27 | docker build \
28 |     --build-arg BASE_IMAGE=$IMAGE_NAME \
29 |     -t $IMAGE_NAME:$IMAGE_TAG \
30 |     -t $IMAGE_NAME:latest \
31 |     -f _common/aws.dockerfile .


--------------------------------------------------------------------------------
/src/containers/_common/push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | IMAGE_NAME=$1
 5 | IMAGE_TAG=$2
 6 | 
 7 | echo "Docker Login to ECR"
 8 | eval $(aws ecr get-login --no-include-email --region ${AWS_REGION})
 9 | 
10 | # # this script expects the image repository to be created by CFN stack prior to build
11 | # 
12 | # # alternatively, you can create the image repository directly via the aws cli if it does not exist
13 | # aws ecr describe-repositories --repository-names ${IMAGE_NAME} \
14 | # || aws ecr create-repository --repository-name ${IMAGE_NAME}
15 | # 
16 | # # and add an appropriate lifecycle policy
17 | # lifecycle_policy=$(cat <<EOF
18 | # {
19 | #     "rules": [
20 | #         {
21 | #             "rulePriority": 1,
22 | #             "description": "Keep only one untagged image, expire all others",
23 | #             "selection": {
24 | #                 "tagStatus": "untagged",
25 | #                 "countType": "imageCountMoreThan",
26 | #                 "countNumber": 1
27 | #             },
28 | #             "action": {
29 | #                 "type": "expire"
30 | #             }
31 | #         }
32 | #     ]
33 | # }
34 | # EOF
35 | # )
36 | # aws ecr put-lifecycle-policy --repository-name ${IMAGE_NAME} --lifecycle-policy-text "$lifecycle_policy"
37 | 
38 | 
39 | REPOSITORY=$(\
40 |     aws ecr describe-repositories \
41 |         --repository-names ${IMAGE_NAME} \
42 |         --output text \
43 |         --query "repositories[0].repositoryUri")
44 | 
45 | echo "Image repository: $REPOSITORY"
46 | 
47 | echo "Tagging container image for ECR"
48 | docker tag ${IMAGE_NAME}:${IMAGE_TAG} ${REPOSITORY}:${IMAGE_TAG}
49 | docker tag ${IMAGE_NAME}:latest ${REPOSITORY}:latest
50 | 
51 | echo "Pushing container images to ECR"
52 | docker push ${REPOSITORY}


--------------------------------------------------------------------------------
/src/containers/bcftools/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build
 2 | 
 3 | ARG VERSION=1.9
 4 | 
 5 | # Metadata
 6 | LABEL container.base.image="public.ecr.aws/lts/ubuntu:18.04"
 7 | LABEL software.name="BCFtools"
 8 | LABEL software.version=${VERSION}
 9 | LABEL software.description="Utilities for variant calling and manipulating files in the Variant Call Format (VCF) and its binary counterpart BCF"
10 | LABEL software.website="http://www.htslib.org"
11 | LABEL software.documentation="http://www.htslib.org/doc/bcftools.html"
12 | LABEL software.license="MIT/Expat"
13 | LABEL tags="Genomics"
14 | 
15 | # System and library dependencies
16 | RUN apt-get -y update && \
17 |     apt-get -y install \
18 |       autoconf \
19 |       automake \
20 |       make \
21 |       gcc \
22 |       perl \
23 |       zlib1g-dev \
24 |       libbz2-dev \
25 |       liblzma-dev \
26 |       libcurl4-gnutls-dev \
27 |       libssl-dev \
28 |       libncurses5-dev \
29 |       wget && \
30 |     apt-get clean
31 | 
32 | # Application installation
33 | RUN wget -O /bcftools-${VERSION}.tar.bz2 \
34 |   https://github.com/samtools/bcftools/releases/download/${VERSION}/bcftools-${VERSION}.tar.bz2 && \
35 |   tar xvjf /bcftools-${VERSION}.tar.bz2 && rm /bcftools-${VERSION}.tar.bz2
36 | 
37 | WORKDIR /bcftools-${VERSION}
38 | RUN ./configure && make
39 | 
40 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final
41 | COPY --from=build /bcftools-*/bcftools /usr/local/bin
42 | 
43 | RUN apt-get -y update && \
44 |     apt-get -y install \
45 |       libcurl3-gnutls && \
46 |     apt-get clean
47 | 
48 | ENTRYPOINT ["bcftools"]
49 | 


--------------------------------------------------------------------------------
/src/containers/buildspec-nextflow.yml:
--------------------------------------------------------------------------------
 1 | # CodeBuild buildspec file for creating container image for nextflow
 2 | # assumes the following environment variables:
 3 | # - PROJECT_BRANCH: git branch / tag / commit-id to build
 4 | # - PROJECT_PATH: path in the source to navigate to prior to build
 5 | # - REGISTRY: docker image registry (e.g. ECR) to push the container image to
 6 | # - IMAGE_NAME: name of the container image
 7 | # - IMAGE_TAG: tag for the container image (will also push as "latest")
 8 | # - AWS_REGION: (Provided by CodeBuild) region to use for ECR
 9 | version: 0.2
10 | phases:
11 |   pre_build:
12 |     commands:
13 |       - git checkout $PROJECT_BRANCH
14 |       - cd $PROJECT_PATH
15 |       - cp -R ../_common .
16 |   build:
17 |     commands:
18 |       - echo "Building container image"
19 |       - docker build --build-arg VERSION=${IMAGE_TAG} -t ${IMAGE_NAME}:${IMAGE_TAG} -t ${IMAGE_NAME}:latest .
20 |   post_build:
21 |     commands:
22 |       - echo "Pushing container image"
23 |       - chmod +x _common/push.sh
24 |       - _common/push.sh ${IMAGE_NAME} ${IMAGE_TAG}


--------------------------------------------------------------------------------
/src/containers/buildspec-workflow-tool.yml:
--------------------------------------------------------------------------------
 1 | # CodeBuild buildspec file for creating container images for a workflow tool
 2 | # assumes the following environment variables:
 3 | # - PROJECT_BRANCH: git branch / tag / commit-id to build
 4 | # - PROJECT_PATH: path in the source to navigate to prior to build
 5 | # - REGISTRY: docker image registry (e.g. ECR) to push the container image to
 6 | # - IMAGE_NAME: name of the container image
 7 | # - IMAGE_TAG: tag for the container image (will also push as "latest")
 8 | # - AWS_REGION: (Provided by CodeBuild) region to use for ECR
 9 | version: 0.2
10 | phases:
11 |   pre_build:
12 |     commands:
13 |       - git checkout $PROJECT_BRANCH
14 |       - cd $PROJECT_PATH
15 |       - cp -R ../_common .
16 |   build:
17 |     commands:
18 |       - echo "Building container image"
19 |       - chmod +x _common/build.sh
20 |       - _common/build.sh ${IMAGE_NAME} ${IMAGE_TAG}
21 |   post_build:
22 |     commands:
23 |       - echo "Pushing container image"
24 |       - chmod +x _common/push.sh
25 |       - _common/push.sh ${IMAGE_NAME} ${IMAGE_TAG}


--------------------------------------------------------------------------------
/src/containers/bwa/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build
 2 | 
 3 | ARG BWA_VERSION=0.7.17
 4 | 
 5 | RUN apt-get update -y \
 6 |  && apt-get install -y \
 7 |     wget \
 8 |     make \
 9 |     gcc \
10 |     zlib1g-dev \
11 |     bzip2
12 | 
13 | 
14 | WORKDIR /opt/src
15 | RUN wget https://github.com/lh3/bwa/releases/download/v${BWA_VERSION}/bwa-${BWA_VERSION}.tar.bz2 \
16 |  && tar -xjvf bwa-*.tar.bz2 \
17 |  && cd bwa-* \
18 |  && make \
19 |  && cp bwa /opt/src
20 | 
21 | 
22 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final
23 | 
24 | RUN apt-get update -y \
25 |  && apt-get install -y \
26 |     wget \
27 |     make \
28 |     zlib1g \
29 |     bzip2 \
30 |  && apt-get clean
31 | 
32 | WORKDIR /opt/bin
33 | COPY --from=build /opt/src/bwa .
34 | 
35 | ENV PATH=/opt/bin:$PATH
36 | 
37 | WORKDIR /scratch
38 | 
39 | ENTRYPOINT ["bwa"]
40 | 
41 | 


--------------------------------------------------------------------------------
/src/containers/nextflow/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG VERSION=latest
 2 | FROM public.ecr.aws/seqera-labs/nextflow:${VERSION} AS build
 3 | 
 4 | RUN yum update -y \
 5 |  && yum install -y \
 6 |     unzip \
 7 |  && yum clean -y all
 8 | RUN rm -rf /var/cache/yum
 9 | 
10 | # install awscli v2
11 | RUN curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip" \
12 |  && unzip -q /tmp/awscliv2.zip -d /tmp \
13 |  && /tmp/aws/install -b /usr/bin \
14 |  && rm -rf /tmp/aws*
15 | 
16 | # install a custom entrypoint script that handles being run within an AWS Batch Job
17 | COPY nextflow.aws.sh /opt/bin/nextflow.aws.sh
18 | RUN chmod +x /opt/bin/nextflow.aws.sh
19 | 
20 | WORKDIR /opt/work
21 | ENTRYPOINT ["/opt/bin/nextflow.aws.sh"]


--------------------------------------------------------------------------------
/src/containers/nextflow/nextflow.aws.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # $1    Nextflow project. Can be an S3 URI, or git repo name.
  3 | # $2..  Additional parameters passed on to the nextflow cli
  4 | 
  5 | # using nextflow needs the following locations/directories provided as
  6 | # environment variables to the container
  7 | #  * NF_LOGSDIR: where caching and logging data are stored
  8 | #  * NF_WORKDIR: where intermmediate results are stored
  9 | 
 10 | set -e  # fail on any error
 11 | 
 12 | DEFAULT_AWS_CLI_PATH=/opt/aws-cli/bin/aws
 13 | AWS_CLI_PATH=${JOB_AWS_CLI_PATH:-$DEFAULT_AWS_CLI_PATH}
 14 | 
 15 | echo "=== ENVIRONMENT ==="
 16 | printenv
 17 | 
 18 | echo "=== RUN COMMAND ==="
 19 | echo "$@"
 20 | 
 21 | NEXTFLOW_PROJECT=$1
 22 | shift
 23 | NEXTFLOW_PARAMS="$@"
 24 | 
 25 | # AWS Batch places multiple jobs on an instance
 26 | # To avoid file path clobbering use the JobID and JobAttempt
 27 | # to create a unique path. This is important if /opt/work
 28 | # is mapped to a filesystem external to the container
 29 | GUID="$AWS_BATCH_JOB_ID/$AWS_BATCH_JOB_ATTEMPT"
 30 | 
 31 | if [ "$GUID" = "/" ]; then
 32 |     GUID=`date | md5sum | cut -d " " -f 1`
 33 | fi
 34 | 
 35 | mkdir -p /opt/work/$GUID
 36 | cd /opt/work/$GUID
 37 | 
 38 | # Create the default config using environment variables
 39 | # passed into the container
 40 | NF_CONFIG=./nextflow.config
 41 | echo "Creating config file: $NF_CONFIG"
 42 | 
 43 | # To figure out - batch volumes 
 44 | cat << EOF > $NF_CONFIG
 45 | workDir = "$NF_WORKDIR"
 46 | process.executor = "awsbatch"
 47 | process.queue = "$NF_JOB_QUEUE"
 48 | aws.batch.cliPath = "$AWS_CLI_PATH"
 49 | EOF
 50 | 
 51 | if [[ "$EFS_MOUNT" != "" ]]
 52 | then
 53 |     echo aws.batch.volumes = [\"/mnt/efs\"] >> $NF_CONFIG
 54 | fi
 55 | 
 56 | echo "=== CONFIGURATION ==="
 57 | cat ./nextflow.config
 58 | 
 59 | # stage in session cache
 60 | # .nextflow directory holds all session information for the current and past runs.
 61 | # it should be `sync`'d with an s3 uri, so that runs from previous sessions can be
 62 | # resumed
 63 | echo "== Restoring Session Cache =="
 64 | aws s3 sync --no-progress $NF_LOGSDIR/.nextflow .nextflow
 65 | 
 66 | function preserve_session() {
 67 |     # stage out session cache
 68 |     if [ -d .nextflow ]; then
 69 |         echo "== Preserving Session Cache =="
 70 |         aws s3 sync --no-progress .nextflow $NF_LOGSDIR/.nextflow
 71 |     fi
 72 | 
 73 |     # .nextflow.log file has more detailed logging from the workflow run and is
 74 |     # nominally unique per run.
 75 |     #
 76 |     # when run locally, .nextflow.logs are automatically rotated
 77 |     # when syncing to S3 uniquely identify logs by the batch GUID
 78 |     if [ -f .nextflow.log ]; then
 79 |         echo "== Preserving Session Log =="
 80 |         aws s3 cp --no-progress .nextflow.log $NF_LOGSDIR/.nextflow.log.${GUID/\//.}
 81 |     fi
 82 | }
 83 | 
 84 | function show_log() {
 85 |     echo "=== Nextflow Log ==="
 86 |     cat ./.nextflow.log
 87 | }
 88 | 
 89 | function cleanup() {
 90 |     set +e
 91 |     wait $NEXTFLOW_PID
 92 |     set -e
 93 |     echo "=== Running Cleanup ==="
 94 | 
 95 |     show_log
 96 |     preserve_session
 97 | 
 98 |     echo "=== Bye! ==="
 99 | }
100 | 
101 | function cancel() {
102 |     # AWS Batch sends a SIGTERM to a container if its job is cancelled/terminated
103 |     # forward this signal to Nextflow so that it can cancel any pending workflow jobs
104 |     
105 |     set +e  # ignore errors here
106 |     echo "=== !! CANCELLING WORKFLOW !! ==="
107 |     echo "stopping nextflow pid: $NEXTFLOW_PID"
108 |     kill -TERM "$NEXTFLOW_PID"
109 |     echo "waiting .."
110 |     wait $NEXTFLOW_PID
111 |     echo "=== !! cancellation complete !! ==="
112 |     set -e
113 | }
114 | 
115 | trap "cancel; cleanup" TERM
116 | trap "cleanup" EXIT
117 | 
118 | # stage workflow definition
119 | if [[ "$NEXTFLOW_PROJECT" =~ ^s3://.* ]]; then
120 |     echo "== Staging S3 Project =="
121 |     aws s3 sync --no-progress --exclude 'runs/*' --exclude '.*' $NEXTFLOW_PROJECT ./project
122 |     NEXTFLOW_PROJECT=./project
123 | fi
124 | 
125 | echo "== Running Workflow =="
126 | echo "nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS"
127 | export NXF_ANSI_LOG=false
128 | nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS &
129 | 
130 | NEXTFLOW_PID=$!
131 | echo "nextflow pid: $NEXTFLOW_PID"
132 | jobs
133 | echo "waiting .."
134 | wait $NEXTFLOW_PID
135 | 


--------------------------------------------------------------------------------
/src/containers/samtools/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/lts/ubuntu:18.04 AS build
 2 | 
 3 | ARG VERSION=1.9
 4 | 
 5 | # Metadata
 6 | LABEL container.base.image="ubuntu:18.04"
 7 | LABEL software.name="SAMtools"
 8 | LABEL software.version=${VERSION}
 9 | LABEL software.description="Utilities for the Sequence Alignment/Map (SAM/BAM/CRAM) formats"
10 | LABEL software.website="http://www.htslib.org"
11 | LABEL software.documentation="http://www.htslib.org/doc/samtools.html"
12 | LABEL software.license="MIT/Expat"
13 | LABEL tags="Genomics"
14 | 
15 | # System and library dependencies
16 | RUN apt-get -y update && \
17 |     apt-get -y install \
18 |       autoconf \
19 |       automake \
20 |       make \
21 |       gcc \
22 |       perl \
23 |       zlib1g-dev \
24 |       libbz2-dev \
25 |       liblzma-dev \
26 |       libcurl4-gnutls-dev \
27 |       libssl-dev \
28 |       libncurses5-dev \
29 |       wget && \
30 |     apt-get clean
31 | 
32 | # Application installation
33 | RUN wget -O /samtools-${VERSION}.tar.bz2 \
34 |   https://github.com/samtools/samtools/releases/download/${VERSION}/samtools-${VERSION}.tar.bz2 && \
35 |   tar xvjf /samtools-${VERSION}.tar.bz2 && rm /samtools-${VERSION}.tar.bz2
36 | 
37 | WORKDIR /samtools-${VERSION}
38 | RUN ./configure && make
39 | 
40 | FROM public.ecr.aws/lts/ubuntu:18.04 AS final
41 | COPY --from=build /samtools-*/samtools /usr/local/bin
42 | 
43 | RUN apt-get -y update && \
44 |     apt-get -y install \
45 |       libcurl3-gnutls && \
46 |     apt-get clean
47 | 
48 | ENTRYPOINT ["samtools"]
49 | 


--------------------------------------------------------------------------------
/src/ebs-autoscale/README.md:
--------------------------------------------------------------------------------
1 | # Amazon Elastic Block Store Autoscale
2 | 
3 | ## RELOCATION NOTICE
4 | 
5 | The code for this daemon has been moved to the following repoository:
6 | [awslabs/amazon-ebs-autoscale](https://github.com/awslabs/amazon-ebs-autoscale)
7 | 


--------------------------------------------------------------------------------
/src/ebs-autoscale/bin/init-ebs-autoscale.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | # this is a shim for backwards compatibility for releases <2.6.0
6 | # old steps:
7 | # - cd /opt && wget $artifactRootUrl/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz
8 | # - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh $scratchPath /dev/sdc  2>&1 > /var/log/init-ebs-autoscale.log
9 | sh /opt/ebs-autoscale/install.sh $@


--------------------------------------------------------------------------------
/src/ecs-additions/awscli-shim.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This shim is for using the AWS ClI v2 with containers that do not have full glibc
 4 | # it makes the shared libraries the AWS CLI v2 findable via LD_LIBRARY_PATH
 5 | #
 6 | # expect to be installed as /opt/aws-cli/bin/aws
 7 | # expect to actually call /opt/aws-cli/dist/aws
 8 | # expect that /opt/aws-cli is mapped to containers
 9 | 
10 | BIN_DIR=`dirname $0`
11 | DIST_DIR=`dirname $BIN_DIR`/dist
12 | AWS=$DIST_DIR/aws
13 | 
14 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DIST_DIR
15 | 
16 | $AWS "$@"
17 | 
18 | 


--------------------------------------------------------------------------------
/src/ecs-additions/ecs-additions-common.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ecs config options
 4 | # graceful shutdown of jobs on spot instances if spot is terminated
 5 | echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config
 6 | # cache already pulled container images and reduce network traffic
 7 | echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config
 8 | # increase docker stop timeout so that containers can perform cleanup actions
 9 | echo ECS_CONTAINER_STOP_TIMEOUT=60 >> /etc/ecs/ecs.config
10 | # This variable specifies how frequently the automated image cleanup process should check for images to delete. The default is every 30 minutes but you can reduce this period to as low as 10 minutes to remove images more frequently.
11 | echo ECS_IMAGE_CLEANUP_INTERVAL=5m >> /etc/ecs/ecs.config
12 | # This variable specifies the minimum amount of time between when an image was pulled and when it may become a candidate for removal. This is used to prevent cleaning up images that have just been pulled. The default is 1 hour.
13 | echo ECS_IMAGE_MINIMUM_CLEANUP_AGE=60m >> /etc/ecs/ecs.config
14 | 
15 | # add fetch and run batch helper script
16 | chmod a+x /opt/ecs-additions/fetch_and_run.sh
17 | cp /opt/ecs-additions/fetch_and_run.sh /usr/local/bin
18 | 
19 | # add awscli-shim
20 | mv /opt/aws-cli/bin /opt/aws-cli/dist
21 | chmod a+x /opt/ecs-additions/awscli-shim.sh
22 | mkdir /opt/aws-cli/bin
23 | cp /opt/ecs-additions/awscli-shim.sh /opt/aws-cli/bin/aws                  # Used in Nextflow
24 | 
25 | # Remove current symlink
26 | rm -f /usr/local/aws-cli/v2/current/bin/aws
27 | cp /opt/ecs-additions/awscli-shim.sh /usr/local/aws-cli/v2/current/bin/aws # Used in Cromwell
28 | 
29 | # ensure that /usr/bin/aws points to the non-shimmed version
30 | ln -sf /usr/local/aws-cli/v2/current/dist/aws /usr/bin/aws
31 | 
32 | # add 4GB of swap space
33 | dd if=/dev/zero of=/swapfile bs=128M count=32
34 | chmod 600 /swapfile
35 | mkswap /swapfile
36 | swapon /swapfile
37 | swapon -s
38 | echo '/swapfile swap swap defaults 0 0' >> /etc/fstab
39 | 


--------------------------------------------------------------------------------
/src/ecs-additions/ecs-additions-cromwell.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SECRET_EXIST=$(
 4 |   aws secretsmanager list-secrets \
 5 |     --filters "Key=name,Values=cromwell/credentials/dockerhub" | jq '.SecretList | length > 0')
 6 | 
 7 | if [[ "$SECRET_EXIST" = true ]]; then
 8 |   SECRET_STRING=$(aws secretsmanager get-secret-value --secret-id cromwell/credentials/dockerhub --query SecretString --output text)
 9 |   echo 'ECS_ENGINE_AUTH_TYPE=docker' >>/etc/ecs/ecs.config
10 |   echo 'ECS_ENGINE_AUTH_DATA={"https://index.docker.io/v1/":'${SECRET_STRING}'}' >>/etc/ecs/ecs.config
11 | fi
12 | 


--------------------------------------------------------------------------------
/src/ecs-additions/ecs-additions-nextflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2019 Amazon.com, Inc. or its affiliates.
 4 | #
 5 | #  Redistribution and use in source and binary forms, with or without
 6 | #  modification, are permitted provided that the following conditions are met:
 7 | #
 8 | #  1. Redistributions of source code must retain the above copyright notice,
 9 | #  this list of conditions and the following disclaimer.
10 | #
11 | #  2. Redistributions in binary form must reproduce the above copyright
12 | #  notice, this list of conditions and the following disclaimer in the
13 | #  documentation and/or other materials provided with the distribution.
14 | #
15 | #  3. Neither the name of the copyright holder nor the names of its
16 | #  contributors may be used to endorse or promote products derived from
17 | #  this software without specific prior written permission.
18 | #
19 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
21 | #  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 | #  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 | #  THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24 | #  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 | #  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | #  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 | #  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 | #  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 | #  IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 | #  POSSIBILITY OF SUCH DAMAGE.
31 | 
32 | # yum install -y bzip2 wget
33 | # USER=/home/ec2-user
34 | 
35 | # wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
36 | # bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $USER/miniconda
37 | # $USER/miniconda/bin/conda install -c conda-forge -y awscli
38 | 
39 | # chown -R ec2-user:ec2-user $USER/miniconda
40 | 
41 | # rm Miniconda3-latest-Linux-x86_64.sh
42 | 


--------------------------------------------------------------------------------
/src/ecs-additions/ecs-additions-step-functions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2019 Amazon.com, Inc. or its affiliates.
 4 | #
 5 | #  Redistribution and use in source and binary forms, with or without
 6 | #  modification, are permitted provided that the following conditions are met:
 7 | #
 8 | #  1. Redistributions of source code must retain the above copyright notice,
 9 | #  this list of conditions and the following disclaimer.
10 | #
11 | #  2. Redistributions in binary form must reproduce the above copyright
12 | #  notice, this list of conditions and the following disclaimer in the
13 | #  documentation and/or other materials provided with the distribution.
14 | #
15 | #  3. Neither the name of the copyright holder nor the names of its
16 | #  contributors may be used to endorse or promote products derived from
17 | #  this software without specific prior written permission.
18 | #
19 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
21 | #  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 | #  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 | #  THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24 | #  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 | #  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | #  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 | #  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 | #  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 | #  IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 | #  POSSIBILITY OF SUCH DAMAGE.
31 | 
32 | # yum install -y bzip2 wget
33 | # PREFIX=/opt
34 | 
35 | # wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
36 | # bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $PREFIX/miniconda
37 | # $PREFIX/miniconda/bin/conda install -c conda-forge -y awscli
38 | 
39 | # chown -R ec2-user:ec2-user $PREFIX/miniconda
40 | 
41 | # rm Miniconda3-latest-Linux-x86_64.sh
42 | 


--------------------------------------------------------------------------------
/src/ecs-additions/provision.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | set -x
  5 | 
  6 | OS="$(uname -r)"
  7 | BASEDIR="$(dirname "${0}")"
  8 | 
  9 | export OS
 10 | 
 11 | # Expected environment variables
 12 | GWFCORE_NAMESPACE=$1
 13 | ARTIFACT_S3_ROOT_URL=$2
 14 | #   WORKFLOW_ORCHESTRATOR (OPTIONAL)
 15 | 
 16 | printenv
 17 | 
 18 | # start ssm-agent
 19 | if [[ $OS =~ "amzn1" ]]; then
 20 |     start amazon-ssm-agent
 21 | elif [[ $OS =~ "amzn2" ]]; then
 22 |     echo "Stopping and upgrading amazon ssm agent" 1>&2
 23 |     systemctl stop amazon-ssm-agent
 24 |     systemctl disable amazon-ssm-agent
 25 |     echo "Downloading latest version" 1>&2
 26 |     curl \
 27 |       --output "amazon-ssm-agent.rpm" \
 28 |       "https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm"
 29 |     echo "Upgrading ssm agent to latest version" 1>&2
 30 |     rpm \
 31 |       --quiet \
 32 |       --install \
 33 |       --force \
 34 |       --upgrade \
 35 |       --replacepkgs \
 36 |       "amazon-ssm-agent.rpm"
 37 |     echo "Re-enabling amazon ssm agent" 1>&2
 38 |     systemctl enable --output=verbose amazon-ssm-agent
 39 |     systemctl start --output=verbose amazon-ssm-agent
 40 |     echo "Cleaning up" 1>&2
 41 |     rm "amazon-ssm-agent.rpm"
 42 | else
 43 |     echo "unsupported os: ${OS}"
 44 |     exit 100
 45 | fi
 46 | 
 47 | function ecs() {
 48 |     
 49 |     if [[ $OS =~ "amzn1" ]]; then
 50 |         # Amazon Linux 1 uses upstart for init
 51 |         case $1 in
 52 |             disable)
 53 |                 stop ecs
 54 |                 service docker stop
 55 |                 ;;
 56 |             enable)
 57 |                 service docker start
 58 |                 start ecs
 59 |                 ;;
 60 |         esac
 61 |     elif [[ $OS =~ "amzn2" ]]; then
 62 |         # Amazon Linux 2 uses systemd for init
 63 |         case $1 in
 64 |             disable)
 65 |                 systemctl stop ecs
 66 |                 systemctl stop docker
 67 |                 ;;
 68 |             enable)
 69 |                 systemctl start docker
 70 |                 systemctl enable --now --no-block ecs  # see: https://github.com/aws/amazon-ecs-agent/issues/1707
 71 |                 ;;
 72 |         esac
 73 |     else
 74 |         echo "unsupported os: ${OS}"
 75 |         exit 100
 76 |     fi
 77 | }
 78 | 
 79 | # make sure that docker and ecs are running on script exit to avoid
 80 | # zombie instances
 81 | trap "ecs enable" INT ERR EXIT
 82 | 
 83 | set +e
 84 | ecs disable
 85 | set -e
 86 | 
 87 | ARTIFACT_S3_ROOT_URL=$(\
 88 |     aws ssm get-parameter \
 89 |         --name "/gwfcore/${GWFCORE_NAMESPACE}/installed-artifacts/s3-root-url" \
 90 |         --query 'Parameter.Value' \
 91 |         --output text \
 92 | )
 93 | 
 94 | ORCHESTRATOR_EXIST=$(\
 95 |     aws ssm describe-parameters \
 96 |         --filters "Key=Name,Values=/gwfcore/${GWFCORE_NAMESPACE}/orchestrator" | \
 97 |     jq '.Parameters | length > 0' \
 98 | )
 99 | 
100 | if [[ "$ORCHESTRATOR_EXIST" == "true" ]]; then
101 |     WORKFLOW_ORCHESTRATOR=$(\
102 |         aws ssm get-parameter \
103 |             --name "/gwfcore/${GWFCORE_NAMESPACE}/orchestrator" \
104 |             --query 'Parameter.Value' \
105 |             --output text \
106 |     )
107 | fi
108 | 
109 | # retrieve and install amazon-ebs-autoscale
110 | cd /opt
111 | bash "${BASEDIR}/get-amazon-ebs-autoscale.sh" \
112 |     --install-version dist_release \
113 |     --artifact-root-url "${ARTIFACT_S3_ROOT_URL}" \
114 |     --file-system btrfs
115 | 
116 | # common provisioning for all workflow orchestrators
117 | cd /opt
118 | bash "${BASEDIR}/ecs-additions-common.sh"
119 | 
120 | # workflow specific provisioning if needed
121 | if [[ -n "$WORKFLOW_ORCHESTRATOR" ]]; then
122 |     if [[ -f "$BASEDIR/ecs-additions-$WORKFLOW_ORCHESTRATOR.sh" ]]; then
123 |         bash "$BASEDIR/ecs-additions-$WORKFLOW_ORCHESTRATOR.sh"
124 |     fi
125 | fi
126 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/.gitignore:
--------------------------------------------------------------------------------
1 | *.js
2 | !jest.config.js
3 | *.d.ts
4 | node_modules
5 | 
6 | # CDK asset staging directory
7 | .cdk.staging
8 | cdk.out
9 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/.npmignore:
--------------------------------------------------------------------------------
1 | *.ts
2 | !*.d.ts
3 | 
4 | # CDK asset staging directory
5 | .cdk.staging
6 | cdk.out
7 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/README.md:
--------------------------------------------------------------------------------
 1 | # Genomics Workflow CodeBuild
 2 | 
 3 | This AWS CDK stack establishes an AWS CodePipeline that automatically keeps your account "GWF Core" infrastructure up to date with the
 4 | latest release of the [aws-samples/aws-genomics-workflows](https://github.com/aws-samples/aws-genomics-workflows) templates
 5 | and artifacts.
 6 | 
 7 | The pipeline is triggered by a GitHub webhook that is triggered by "Push" events on the "release" branch of the
 8 | aws-genomics-workflows repository. When triggered, it will clone the source code and build the templates and artifacts.
 9 | It will then delete any existing "GWF core" Cloudformation deployed stacks and replace them with a new stack. By using
10 | a "delete and replace" strategy rather than an update we avoid issues where AWS Batch Compute Environments don't 
11 | associate themselves with new versions of EC2 Launch Templates during an update.
12 | 
13 | The pipeline doesn't create any workflow engine stacks, such as Cromwell or Nextflow, on top of the core, although
14 | it would be relatively easy to extend it for this purpose if required.
15 | 
16 | ## PreRequisites
17 | 
18 | ### GitHub OAuth token
19 | 
20 | To set up the GitHub hook and allow cloning of the aws-genomics-workflow repository you will need a GitHub OAuth
21 | token with `Repo` and `admin:repo_hook` permissions. These should be stored in AWS Secrets Manager with the "secret name"
22 | `github-token`.
23 | 
24 | * To create the token, follow [these instructions](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token)
25 | * To store the token using the AWS CLI: `aws secretsmanager create-secret --name github-token
26 |   --description "GitHub OAuth Token" --secret-string "insert your GitHub OAuth token"`
27 | 
28 | ### CDK
29 | 
30 | To deploy this stack into your account you need to install AWS CDK >= version 1.127.0 which itself requires node.js 10.13.0 or later.
31 | 
32 | To install CDK type:
33 | 
34 | ```shell
35 | npm install -g aws-cdk
36 | ```
37 | 
38 | If you have not already done so your account and region need to be "bootstrapped" by CDK
39 | 
40 | ```shell
41 | cdk bootstrap aws://ACCOUNT-NUMBER/REGION
42 | ```
43 | 
44 | Full details can be found in the CDK [getting started guide](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html).
45 | 
46 | ### AWS Account and Region
47 | 
48 | CDK will deploy the code pipeline infrastructure into the account and region determined by your curren AWS Profile.
49 | 
50 | ## Deployment
51 | 
52 | To deploy the infrastructure into your account simply type:
53 | 
54 | ```shell
55 | cdk deploy
56 | ```
57 | 
58 | If you want to inspect the cloud formation template that will be used for the deployment you can print it to STDOUT with:
59 | 
60 | ```shell
61 | cdk synth
62 | ```
63 | 
64 | ## Useful commands
65 | 
66 |  * `npm run build`   compile typescript to js
67 |  * `npm run watch`   watch for changes and compile
68 |  * `npm run test`    perform the jest unit tests
69 |  * `cdk deploy`      deploy this stack to your default AWS account/region
70 |  * `cdk diff`        compare deployed stack with current state
71 |  * `cdk synth`       emits the synthesized CloudFormation template
72 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/bin/aws-genomics-workflow-code-build.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import 'source-map-support/register';
 3 | import * as cdk from '@aws-cdk/core';
 4 | import { AwsGenomicsWorkflowCodeBuildStack } from '../lib/aws-genomics-workflow-code-build-stack';
 5 | 
 6 | const app = new cdk.App();
 7 | new AwsGenomicsWorkflowCodeBuildStack(app, 'AwsGenomicsWorkflowCodeBuildStack', {
 8 |   /* If you don't specify 'env', this stack will be environment-agnostic.
 9 |    * Account/Region-dependent features and context lookups will not work,
10 |    * but a single synthesized template can be deployed anywhere. */
11 | 
12 |   /* Uncomment the next line to specialize this stack for the AWS Account
13 |    * and Region that are implied by the current CLI configuration. */
14 |   env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION },
15 | 
16 |   /* Uncomment the next line if you know exactly what Account and Region you
17 |    * want to deploy the stack to. */
18 |   // env: { account: '123456789012', region: 'us-east-1' },
19 | 
20 |   /* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */
21 | });
22 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "npx ts-node --prefer-ts-exts bin/aws-genomics-workflow-code-build.ts",
 3 |   "context": {
 4 |     "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
 5 |     "@aws-cdk/core:enableStackNameDuplicates": "true",
 6 |     "aws-cdk:enableDiffNoFail": "true",
 7 |     "@aws-cdk/core:stackRelativeExports": "true",
 8 |     "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true,
 9 |     "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true,
10 |     "@aws-cdk/aws-kms:defaultKeyPolicies": true,
11 |     "@aws-cdk/aws-s3:grantWriteWithoutAcl": true,
12 |     "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true,
13 |     "@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
14 |     "@aws-cdk/aws-efs:defaultEncryptionAtRest": true
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/jest.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   roots: ['<rootDir>/test'],
3 |   testMatch: ['**/*.test.ts'],
4 |   transform: {
5 |     '^.+\\.tsx?$': 'ts-jest'
6 |   }
7 | };
8 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/lib/aws-genomics-workflow-code-build-stack.ts:
--------------------------------------------------------------------------------
  1 | import * as cdk from '@aws-cdk/core';
  2 | import * as codebuild from '@aws-cdk/aws-codebuild';
  3 | import * as s3 from '@aws-cdk/aws-s3';
  4 | import * as iam from '@aws-cdk/aws-iam';
  5 | import * as codepipeline from '@aws-cdk/aws-codepipeline';
  6 | import * as actions from '@aws-cdk/aws-codepipeline-actions';
  7 | import * as ec2 from '@aws-cdk/aws-ec2';
  8 | import * as regionInfo from '@aws-cdk/region-info';
  9 | 
 10 | 
 11 | export class AwsGenomicsWorkflowCodeBuildStack extends cdk.Stack {
 12 |   constructor(scope: cdk.Construct, id: string, props?: cdk.StackProps) {
 13 |     super(scope, id, props);
 14 | 
 15 |     const info = regionInfo.RegionInfo.get(this.region);
 16 |     const s3Endpoint = info.servicePrincipal("s3.amazonaws.com");
 17 | 
 18 |     const vpc = new ec2.Vpc(this, "CromwellVPC", {
 19 |       maxAzs: 3,
 20 |       gatewayEndpoints: {
 21 |         S3: {
 22 |           service: ec2.GatewayVpcEndpointAwsService.S3,
 23 |         },
 24 |       }
 25 |     });
 26 | 
 27 |     // S3 bucket for storing templates and artifacts
 28 |     const artifactBucket = new s3.Bucket(this,"GWFArtifactsBucket", {
 29 |       encryption: s3.BucketEncryption.S3_MANAGED,
 30 |     });
 31 | 
 32 |     // S3 bucket that Cromwell will use
 33 |     const gwfBucket = new s3.Bucket(this, "GWFCoreBucket", {
 34 |       encryption: s3.BucketEncryption.S3_MANAGED,
 35 |     })
 36 | 
 37 |     // objects needed for the "Source" stage of the pipeline
 38 |     const gitHubToken: cdk.SecretValue = cdk.SecretValue.secretsManager("github-token")
 39 |     const sourceOutput = new codepipeline.Artifact();
 40 |     const sourceAction = new actions.GitHubSourceAction({
 41 |       actionName: "GitHub_Source",
 42 |       owner: 'aws-samples',
 43 |       repo: "aws-genomics-workflows",
 44 |       branch: "release",
 45 |       oauthToken: gitHubToken,
 46 |       output: sourceOutput,
 47 |       trigger: actions.GitHubTrigger.WEBHOOK
 48 |     })
 49 | 
 50 |     // objects needed for the "Build" stage of the pipeline
 51 |     const buildOutput = new codepipeline.Artifact();
 52 |     const project = new codebuild.Project(this, "GenomicsWorkflowBuildProject", {
 53 |       description: "Builds the templates and artifacts for aws-genomics-workflows",
 54 |       artifacts: codebuild.Artifacts.s3({
 55 |         bucket: artifactBucket,
 56 |         packageZip: false,
 57 |       }),
 58 |       buildSpec: codebuild.BuildSpec.fromObject({
 59 |         version: 0.2,
 60 |         phases: {
 61 |           build: {
 62 |             commands: [
 63 |               "ls -alF",
 64 |               "bash _scripts/make-dist.sh --verbose",
 65 |               "ls -alF dist/",
 66 |               `aws s3 sync dist/ s3://${artifactBucket.bucketName}`
 67 |             ],
 68 |           },
 69 |         },
 70 |         artifacts: {
 71 |           "base-directory": "dist",
 72 |           files: "**/*",
 73 |         }
 74 |       }),
 75 |       environment: {buildImage: codebuild.LinuxBuildImage.AMAZON_LINUX_2_3},
 76 |       concurrentBuildLimit: 1,
 77 |       timeout: cdk.Duration.minutes(15),
 78 |     });
 79 |     project.addToRolePolicy(new iam.PolicyStatement({
 80 |       effect: iam.Effect.ALLOW,
 81 |       actions: ["s3:Get*", "s3:Put*", "s3:List*"],
 82 |       resources: [`${artifactBucket.bucketArn}`, `${artifactBucket.bucketArn}/*`]
 83 |     }));
 84 |     const buildAction = new actions.CodeBuildAction({
 85 |       actionName: "Build_Artifacts_And_Templates",
 86 |       project: project,
 87 |       input: sourceOutput,
 88 |       outputs: [ buildOutput ]
 89 |     });
 90 | 
 91 |     //objects needed for the "Deploy" stage of the pipeline
 92 |     const deleteGWFCoreStackAction = new actions.CloudFormationDeleteStackAction({
 93 |       actionName: "Delete_GWF_Core_Stack",
 94 |       stackName: "GWFCoreStack",
 95 |       adminPermissions: true,
 96 |       runOrder: 10,
 97 |     });
 98 |     const createGWFCoreAction = new actions.CloudFormationCreateUpdateStackAction({
 99 |       actionName: "Create_GWF_Core",
100 |       stackName: "GWFCoreStack",
101 |       adminPermissions: true,
102 |       templatePath: buildOutput.atPath("templates/gwfcore/gwfcore-root.template.yaml"),
103 |       parameterOverrides: {
104 |         VpcId: vpc.vpcId,
105 |         SubnetIds: vpc.privateSubnets.map(value => value.subnetId).join(","),
106 |         ArtifactBucketName: artifactBucket.bucketName,
107 |         TemplateRootUrl: `https://${artifactBucket.bucketName}.${s3Endpoint}/templates`,
108 |         S3BucketName: gwfBucket.bucketName,
109 |         ExistingBucket: "Yes",
110 |       },
111 |       runOrder: 20,
112 |     });
113 | 
114 | 
115 |     // the pipeline
116 |     new codepipeline.Pipeline(this, 'AmazonGenomicsWorkflowPipeline', {
117 |       pipelineName: 'AmazonGenomicsWorkflowPipeline',
118 |       stages: [
119 |         {
120 |           stageName: 'Source',
121 |           actions: [
122 |             sourceAction,
123 |           ],
124 |         },
125 |         {
126 |           stageName: 'Build',
127 |           actions: [
128 |             buildAction
129 |           ],
130 |         },
131 |         {
132 |           stageName: 'Deploy',
133 |           actions: [
134 |               deleteGWFCoreStackAction,
135 |               createGWFCoreAction,
136 |           ],
137 |         },
138 |       ],
139 |     });
140 | 
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "gwf-core-codepipeline",
 3 |   "version": "1.0.0",
 4 |   "dependencies": {
 5 |     "@aws-cdk/aws-codebuild": "^1.127.0",
 6 |     "@aws-cdk/aws-codepipeline": "^1.127.0",
 7 |     "@aws-cdk/aws-codepipeline-actions": "^1.127.0",
 8 |     "@aws-cdk/core": "^1.127.0",
 9 |     "source-map-support": "0.5.16"
10 |   },
11 |   "bin": {
12 |     "cdk-test": "bin/cdk-test.js"
13 |   },
14 |   "scripts": {
15 |     "build": "tsc",
16 |     "watch": "tsc -w",
17 |     "test": "jest --passWithNoTests",
18 |     "cdk": "cdk"
19 |   },
20 |   "devDependencies": {
21 |     "@aws-cdk/assert": "^1.127.0",
22 |     "@types/jest": "^26.0.10",
23 |     "@types/node": "10.17.27",
24 |     "jest": "^27.2.5",
25 |     "ts-jest": "^26.2.0",
26 |     "aws-cdk": "^1.127.0",
27 |     "ts-node": "^9.0.0",
28 |     "typescript": "~3.9.7"
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/gwf-core-codepipeline/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2018",
 4 |     "module": "commonjs",
 5 |     "lib": ["es2018"],
 6 |     "declaration": true,
 7 |     "strict": true,
 8 |     "noImplicitAny": true,
 9 |     "strictNullChecks": true,
10 |     "noImplicitThis": true,
11 |     "alwaysStrict": true,
12 |     "noUnusedLocals": false,
13 |     "noUnusedParameters": false,
14 |     "noImplicitReturns": true,
15 |     "noFallthroughCasesInSwitch": false,
16 |     "inlineSourceMap": true,
17 |     "inlineSources": true,
18 |     "experimentalDecorators": true,
19 |     "strictPropertyInitialization": false,
20 |     "typeRoots": ["./node_modules/@types"]
21 |   },
22 |   "exclude": ["cdk.out"]
23 | }
24 | 


--------------------------------------------------------------------------------
/src/lambda/codebuild/lambda.py:
--------------------------------------------------------------------------------
 1 | # /*********************************************************************************************************************
 2 | # *  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.                                                *
 3 | # *                                                                                                                    *
 4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
 5 | # *  with the License. A copy of the License is located at                                                             *
 6 | # *                                                                                                                    *
 7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
 8 | # *                                                                                                                    *
 9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
11 | # *  and limitations under the License.                                                                                *
12 | # *********************************************************************************************************************/
13 | 
14 | from __future__ import print_function
15 | from crhelper import CfnResource
16 | import logging
17 | import boto3
18 | import time
19 | 
20 | logger = logging.getLogger(__name__)
21 | # Initialise the helper, all inputs are optional, this example shows the defaults
22 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
23 | 
24 | try:
25 |     codebuild = boto3.client('codebuild')
26 |     # pass
27 | except Exception as e:
28 |     helper.init_failure(e)
29 | 
30 | 
31 | @helper.create
32 | def create(event, context):
33 |     logger.info("Got Create")
34 |     start_build_job(event, context)
35 | 
36 | 
37 | @helper.update
38 | def update(event, context):
39 |     logger.info("Got Update")
40 |     start_build_job(event, context)
41 | 
42 | 
43 | @helper.delete
44 | def delete(event, context):
45 |     logger.info("Got Delete")
46 |     # Delete never returns anything. Should not fail if the underlying resources are already deleted. Desired state.
47 | 
48 | 
49 | @helper.poll_create
50 | def poll_create(event, context):
51 |     logger.info("Got Create poll")
52 |     return check_build_job_status(event, context)
53 | 
54 | 
55 | @helper.poll_update
56 | def poll_update(event, context):
57 |     logger.info("Got Update poll")
58 |     return check_build_job_status(event, context)
59 | 
60 | 
61 | def handler(event, context):
62 |     helper(event, context)
63 | 
64 | 
65 | def start_build_job(event, context, action='setup'):
66 |     response = codebuild.start_build(
67 |         projectName=event['ResourceProperties']['BuildProject']
68 |     )
69 |     logger.info(response)
70 |     helper.Data.update({"JobID": response['build']['id']})
71 | 
72 | 
73 | def check_build_job_status(event, context):
74 |     code_build_project_name = event['ResourceProperties']['BuildProject']
75 | 
76 |     if not helper.Data.get("JobID"):
77 |         raise ValueError("Job ID missing in the polling event.")
78 | 
79 |     job_id = helper.Data.get("JobID")
80 | 
81 |     # 'SUCCEEDED' | 'FAILED' | 'FAULT' | 'TIMED_OUT' | 'IN_PROGRESS' | 'STOPPED'
82 |     response = codebuild.batch_get_builds(ids=[job_id])
83 |     build_status = response['builds'][0]['buildStatus']
84 | 
85 |     if build_status == 'IN_PROGRESS':
86 |         logger.info(build_status)
87 |         return None
88 |     else:
89 |         if build_status == 'SUCCEEDED':
90 |             logger.info(build_status)
91 |             return True
92 |         else:
93 |             msg = "Code Build job '{0}' in project '{1}' exited with a build status of '{2}'." \
94 |                 .format(job_id, code_build_project_name, build_status)
95 |             logger.info(msg)
96 |             raise ValueError(msg)
97 | 


--------------------------------------------------------------------------------
/src/lambda/codebuild/requirements.txt:
--------------------------------------------------------------------------------
1 | crhelper
2 | 


--------------------------------------------------------------------------------
/src/lambda/ecr/lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.                                                *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | from time import sleep
 15 | 
 16 | import boto3
 17 | import cfnresponse
 18 | 
 19 | 
 20 | send, SUCCESS, FAILED = (
 21 |     cfnresponse.send, 
 22 |     cfnresponse.SUCCESS, 
 23 |     cfnresponse.FAILED
 24 | )
 25 | ecr = boto3.client('ecr')
 26 | 
 27 | 
 28 | def wait(repo, until):
 29 |     until = until.lower()
 30 |     if until == "deleted":
 31 |         while True:
 32 |             try:
 33 |                 sleep(1)
 34 |                 ecr.describe_repositories(repositoryNames=[repo])
 35 |             except ecr.exceptions.RepositoryNotFoundException:
 36 |                 break
 37 |     
 38 |     if until == "exists":
 39 |         exists = False
 40 |         while not exists:
 41 |             try:
 42 |                 sleep(1)
 43 |                 exists = ecr.describe_repositories(repositoryNames=[repo])["repositories"]
 44 |                 break
 45 |             except ecr.exceptions.RepositoryNotFoundException:
 46 |                 exists = False
 47 | 
 48 | 
 49 | 
 50 | def put_lifecycle_policy(repo, props):
 51 |     if props.get("LifecyclePolicy"):
 52 |         ecr.put_lifecycle_policy(
 53 |             repositoryName=repo,
 54 |             lifecyclePolicyText=props["LifecyclePolicy"]["LifecyclePolicyText"]
 55 |         )
 56 | 
 57 | 
 58 | def create(repo, props, event, context):
 59 |     # use existing repository if available, otherwise create
 60 |     try:
 61 |         ecr.create_repository(repositoryName=repo)
 62 |         wait(repo, "exists")
 63 |         put_lifecycle_policy(repo, props)
 64 |         
 65 |     except ecr.exceptions.RepositoryAlreadyExistsException:
 66 |         print(f"Repository '{repo}' already exists - CREATE ECR repository ignored")
 67 |         put_lifecycle_policy(repo, props)
 68 |         
 69 |     except Exception as e:
 70 |         send(event, context, FAILED, None)
 71 |         raise(e)
 72 | 
 73 | 
 74 | def update(repo, props, event, context):
 75 |     # use existing repository if available
 76 |     update_policy = props.get("UpdateReplacePolicy")
 77 |     try:
 78 |         if update_policy and update_policy.lower() == "retain":
 79 |             put_lifecycle_policy(repo, props)
 80 |         else:
 81 |             # replace the repo
 82 |             delete(repo, props, event, context)
 83 |             create(repo, props, event, context)
 84 |     except Exception as e:
 85 |         send(event, context, FAILED, None)
 86 |         raise(e)
 87 | 
 88 | 
 89 | def delete(repo, props, event, context):
 90 |     # retain repository if specified
 91 |     # otherwise force delete
 92 |     delete_policy = props.get("DeletePolicy")
 93 |     try:
 94 |         if delete_policy and not delete_policy.lower() == "retain":
 95 |             ecr.delete_repository(repositoryName=repo, force=True)
 96 |             wait(repo, "deleted")
 97 |     
 98 |     except Exception as e:
 99 |         send(event, context, FAILED, None)
100 |         raise(e)
101 | 
102 | 
103 | def handler(event, context):
104 |     props = event["ResourceProperties"]
105 |     repo = props.get("RepositoryName")
106 |     
107 |     if event["RequestType"] in ("Create", "Update", "Delete"):
108 |         action = globals()[event["RequestType"].lower()]
109 |         action(repo, props, event, context)
110 |         send(event, context, SUCCESS, None)
111 |     else:
112 |         # unhandled request type
113 |         send(event, context, FAILED, None)


--------------------------------------------------------------------------------
/src/lambda/ecr/requirements.txt:
--------------------------------------------------------------------------------
1 | #crhelper
2 | cfnresponse


--------------------------------------------------------------------------------
/src/templates/README.md:
--------------------------------------------------------------------------------
 1 | # Genomics Workflows on AWS CloudFormation templates
 2 | 
 3 | Contained herein are CloudFormation templates for creating AWS resources for working with large-scale biomedical data - e.g. genomics.
 4 | 
 5 | ## Core Stack
 6 | 
 7 | Templates in `gwfcore` are the "core" stack.  The root template is:
 8 | 
 9 | | File | Description |
10 | | :--- | :---------- |
11 | | `gwfcore-root.template.yaml` | Root stack that invokes nested stacks (see below) |
12 | 
13 | Nested stacks are as follows and listed in order of creation:
14 | 
15 | | File | Description |
16 | | :--- | :---------- |
17 | | `gwfcore-s3.template.yaml` | Creates an S3 bucket for storing installed artifacts and workflow input and output data |
18 | | `gwfcore-code.template.yaml` | Creates and installs code and artifacts used to run subsequent templates and provision EC2 instances |
19 | | `gwfcore-launch-template.template.yaml` | Creates an EC2 Launch Template used in AWS Batch Compute Environments |
20 | | `gwfcore-iam.template.yaml` | Creates IAM roles for AWS Batch resources |
21 | | `gwfcore-batch.template.yaml` | Creates AWS Batch Job Queues and Compute Environments for job execution |
22 | 
23 | Optional Stacks
24 | | File | Description |
25 | | :--- | :---------- |
26 | | `gwfcore-fsx.template.yaml` | Creates an FSx for Lustre file system (only Persistent 1 type) mapped to the S3 bucket for storing workflow input, output and reference data. Refer Note section at the bottom. |
27 | | `gwfcore-efs.template.yaml` | Creates an EFS file system for storing workflow input, output and reference data |
28 | 
29 | ## Orchestration Stacks
30 | 
31 | The following Stacks provide solutions that utilize:
32 | 
33 | * AWS Step-Functions
34 | * Cromwell
35 | * Nextflow
36 | 
37 | They build atop the Core Stack above. They provide the additional resources needed to run each orchestrator.
38 | 
39 | | File | Description |
40 | | :--- | :---------- |
41 | | `step-functions/sfn-resources.template.yaml` | Creates an example AWS Step Functions state-machine and containers for an example genomics workflow using BWA, samtools, and bcftools. |
42 | | `cromwell/cromwell-resources.template.yaml` | Creates an EC2 instance with Cromwell pre-installed and launched in "server" mode and an RDS Aurora Serverless database |
43 | | `nextflow/nextflow-resources.template.yaml` | Creates a Nextflow container and AWS Batch Job Definition for running Nextflow |
44 | 
45 | 
46 | Note : As System Manager Parameter Store is being used, make sure to increase the throughput from console. To do that follow below :
47 | AWS Systems Manager -> Parameter Store -> Settings -> Parameter Store throughput -> paid tier/higher throughput limit.


--------------------------------------------------------------------------------
/src/templates/gwfcore/gwfcore-efs.template.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | AWSTemplateFormatVersion: '2010-09-09'
  3 | Description: >-
  4 |   (WWPS-GLS-WF-GWFCORE-EFS) Creates EFS file system and mount targets to a list of subnets
  5 | 
  6 | Parameters:
  7 |   VpcId:
  8 |     Type: AWS::EC2::VPC::Id
  9 |     Description: 'The VPC to create security groups and deploy AWS Batch to. NOTE: Must be the same VPC as the provided subnet IDs.'
 10 |   SubnetIds:
 11 |     Type: List<AWS::EC2::Subnet::Id>
 12 |     Description: 'Subnets you want your batch compute environment to launch in. We recommend private subnets. NOTE: Must be from the VPC provided.'
 13 |   NumberOfSubnets:
 14 |     Type: Number
 15 |     Description: Number of subnets to launch into. Should correspond to the length of Subnet Ids
 16 |     MinValue: 1
 17 |     MaxValue: 6
 18 | 
 19 | Conditions:
 20 |   2SubnetCondition: !Or
 21 |     - !Equals [!Ref 'NumberOfSubnets', '2']
 22 |     - !Condition '3SubnetCondition'
 23 |   3SubnetCondition: !Or
 24 |     - !Equals [!Ref 'NumberOfSubnets', '3']
 25 |     - !Condition '4SubnetCondition'
 26 |   4SubnetCondition: !Or
 27 |     - !Equals [!Ref 'NumberOfSubnets', '4']
 28 |     - !Condition '5SubnetCondition'
 29 |   5SubnetCondition: !Or
 30 |     - !Equals [!Ref 'NumberOfSubnets', '5']
 31 |     - !Condition '6SubnetCondition'
 32 |   6SubnetCondition: !Equals [!Ref NumberOfSubnets, '6']
 33 | 
 34 | Resources:
 35 |   SharedDataFileSystem:
 36 |     Type: AWS::EFS::FileSystem
 37 |     Properties:
 38 |       PerformanceMode: generalPurpose
 39 |       Encrypted: true
 40 |       FileSystemTags:
 41 |       - Key: Name
 42 |         Value: SharedDataGenomics
 43 | 
 44 |   MountTargetSecurityGroup:
 45 |     Type: AWS::EC2::SecurityGroup
 46 |     Properties:
 47 |       VpcId: !Ref VpcId
 48 |       GroupDescription: Security group for mount target
 49 |       SecurityGroupIngress:
 50 |       - IpProtocol: tcp
 51 |         FromPort: 2049
 52 |         ToPort: 2049
 53 |         CidrIp: 0.0.0.0/0
 54 | 
 55 |   MountTargetSubnet1:
 56 |     Type: AWS::EFS::MountTarget
 57 |     Properties:
 58 |       FileSystemId: !Ref SharedDataFileSystem
 59 |       SubnetId: !Select [0, !Ref SubnetIds]
 60 |       SecurityGroups: 
 61 |       - !Ref MountTargetSecurityGroup
 62 | 
 63 |   MountTargetSubnet2:
 64 |     Type: AWS::EFS::MountTarget
 65 |     Condition: 2SubnetCondition
 66 |     Properties:
 67 |       FileSystemId: !Ref SharedDataFileSystem
 68 |       SubnetId: !Select [1, !Ref SubnetIds]
 69 |       SecurityGroups: 
 70 |       - !Ref MountTargetSecurityGroup
 71 | 
 72 |   MountTargetSubnet3:
 73 |     Type: AWS::EFS::MountTarget
 74 |     Condition: 3SubnetCondition
 75 |     Properties:
 76 |       FileSystemId: !Ref SharedDataFileSystem
 77 |       SubnetId: !Select [2, !Ref SubnetIds]
 78 |       SecurityGroups: 
 79 |       - !Ref MountTargetSecurityGroup
 80 | 
 81 |   MountTargetSubnet4:
 82 |     Type: AWS::EFS::MountTarget
 83 |     Condition: 4SubnetCondition
 84 |     Properties:
 85 |       FileSystemId: !Ref SharedDataFileSystem
 86 |       SubnetId: !Select [3, !Ref SubnetIds]
 87 |       SecurityGroups: 
 88 |       - !Ref MountTargetSecurityGroup
 89 | 
 90 |   MountTargetSubnet5:
 91 |     Type: AWS::EFS::MountTarget
 92 |     Condition: 5SubnetCondition
 93 |     Properties:
 94 |       FileSystemId: !Ref SharedDataFileSystem
 95 |       SubnetId: !Select [4, !Ref SubnetIds]
 96 |       SecurityGroups: 
 97 |       - !Ref MountTargetSecurityGroup
 98 | 
 99 |   MountTargetSubnet6:
100 |     Type: AWS::EFS::MountTarget
101 |     Condition: 6SubnetCondition
102 |     Properties:
103 |       FileSystemId: !Ref SharedDataFileSystem
104 |       SubnetId: !Select [5, !Ref SubnetIds]
105 |       SecurityGroups: 
106 |       - !Ref MountTargetSecurityGroup
107 | 
108 | Outputs:
109 |   EfsId:
110 |     Value: !Ref SharedDataFileSystem
111 |     Description: EFS ID
112 | 
113 | 


--------------------------------------------------------------------------------
/src/templates/gwfcore/gwfcore-fsx.template.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: "2010-09-09"
  2 | Description: >-
  3 |   (WWPS-GLS-WF-GWFCORE-FSX) Creates FSx for Lustre file system
  4 | 
  5 | Parameters:
  6 |   VpcId:
  7 |     Type: AWS::EC2::VPC::Id
  8 |     Description: The VPC to create security groups
  9 |   SubnetId:
 10 |     Type: String
 11 |     Description: "Subnet you want your FSx for lustre file system to launch in. Ensure Batch compute environment is also launched in that subnet only."
 12 |   S3BucketName:
 13 |     Type: String
 14 |     AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))"
 15 |     ConstraintDescription: "Must respect AWS naming conventions"
 16 |     Description: A S3 bucket name to mount on FSx
 17 |   FSxStorageType:
 18 |     Type: String
 19 |     Description: The type of FS needed i.e. SSD/HDD/SCRATCH, all capitals.
 20 |     Default: "SCRATCH"
 21 |     AllowedValues:
 22 |       - "SSD"
 23 |       - "HDD"
 24 |       - "SCRATCH"
 25 |   FSxStorageVolumeSize:
 26 |     Type: Number
 27 |     Default: 0
 28 |     Description: The initial size of the FSx volume to be used in GB. 0 will map to the minimum allowed size for this type of file system.
 29 |   FSxPerUnitStorageThroughput:
 30 |     Type: Number
 31 |     Default: 0
 32 |     Description: The throughput to be used for the storage, should be as provided, SSD - 50/100/200 mbps or HDD - 12/40. 0 will map to the minimum allowed throughput for this type of file system.
 33 | 
 34 | Mappings:
 35 |   FSxTypeMap:
 36 |     SSD: 
 37 |       DeploymentTypeString: "PERSISTENT_1"
 38 |       StorageTypeString: "SSD"
 39 |       MinThroughput: 50
 40 |       MinStorageCap: 1200
 41 |     HDD: 
 42 |       DeploymentTypeString: "PERSISTENT_1"
 43 |       StorageTypeString: "HDD"
 44 |       MinThroughput: 12
 45 |       MinStorageCap: 6000
 46 |     SCRATCH: 
 47 |       DeploymentTypeString: "SCRATCH_2"
 48 |       StorageTypeString: "SSD"
 49 |       MinThroughput: "NA"
 50 |       MinStorageCap: 1200
 51 |     
 52 |   TagMap:
 53 |     default:
 54 |       architecture: "genomics-workflows"
 55 |       solution: "default"
 56 |       tags:
 57 |         - Key: "architecture"
 58 |           Value: "genomics-workflows"
 59 |         - Key: "solution"
 60 |           Value: "default"
 61 | 
 62 | Conditions:
 63 |   ScratchCheck: !Equals [!Ref FSxStorageType, "SCRATCH"]
 64 |   TypeCheck: !Or [!Equals [!Ref FSxStorageType, "SCRATCH"], !Equals [!Ref FSxStorageType, "SSD"]]
 65 |   IsMinThroughput: !Equals [!Ref FSxPerUnitStorageThroughput, 0]
 66 |   IsMinStorageCapacity: !Equals [!Ref FSxStorageVolumeSize, 0]
 67 | 
 68 | 
 69 | Resources:
 70 |   FSxSecurityGroup:
 71 |     Type: AWS::EC2::SecurityGroup
 72 |     Properties:
 73 |       GroupDescription: SG for FSx
 74 |       VpcId:
 75 |         Ref: VpcId
 76 |       Tags:
 77 |         - Key: Application
 78 |           Value: AWS-GENOMICS-WKF
 79 | 
 80 |   SGIngressTCP988:
 81 |     Type: AWS::EC2::SecurityGroupIngress
 82 |     Properties:
 83 |       Description: "Allow TCP Connections for this security group"
 84 |       GroupId: !Ref FSxSecurityGroup
 85 |       SourceSecurityGroupId: !Ref FSxSecurityGroup
 86 |       IpProtocol: tcp
 87 |       FromPort: 988
 88 |       ToPort: 988
 89 | 
 90 |   SGIngressTCP1021:
 91 |     Type: AWS::EC2::SecurityGroupIngress
 92 |     Properties:
 93 |       Description: "Allow TCP Connections for this security group"
 94 |       GroupId: !Ref FSxSecurityGroup
 95 |       SourceSecurityGroupId: !Ref FSxSecurityGroup
 96 |       IpProtocol: tcp
 97 |       FromPort: 1021
 98 |       ToPort: 1023
 99 | 
100 |   FSxFileSystem:
101 |     Type: AWS::FSx::FileSystem
102 |     Properties:
103 |       FileSystemType: "LUSTRE"
104 |       LustreConfiguration:
105 |         AutoImportPolicy: "NEW_CHANGED"
106 |         DeploymentType: !FindInMap [FSxTypeMap, !Ref FSxStorageType, DeploymentTypeString]
107 |         DriveCacheType: 
108 |           Fn::If:
109 |             - TypeCheck
110 |             - !Ref AWS::NoValue
111 |             - "NONE"
112 |         ExportPath: !Sub s3://${S3BucketName}
113 |         ImportPath: !Sub s3://${S3BucketName}
114 |         PerUnitStorageThroughput:
115 |           Fn::If:
116 |             - ScratchCheck
117 |             - !Ref AWS::NoValue
118 |             - Fn::If:
119 |               - IsMinThroughput
120 |               - !FindInMap [FSxTypeMap, !Ref FSxStorageType, MinThroughput]
121 |               - !Ref FSxPerUnitStorageThroughput
122 |       SecurityGroupIds: 
123 |         - !Ref FSxSecurityGroup
124 |       StorageCapacity:
125 |         Fn::If:
126 |           - IsMinStorageCapacity
127 |           - !FindInMap [FSxTypeMap, !Ref FSxStorageType, MinStorageCap]
128 |           - !Ref FSxStorageVolumeSize
129 |       StorageType: !FindInMap [FSxTypeMap, !Ref FSxStorageType, StorageTypeString]
130 |       SubnetIds: [!Ref SubnetId]
131 |       Tags:
132 |         - Key: architecture
133 |           Value: !FindInMap ["TagMap", "default", "architecture"]
134 | 
135 | Outputs:
136 |   FSxId:
137 |     Value: !Ref FSxFileSystem
138 |     Description: FSx ID
139 |   FSxMount:
140 |     Value: !GetAtt FSxFileSystem.LustreMountName
141 |     Description: FSx Mount Name
142 |   FSxSecurityGroupId:
143 |     Description: The FSx Security Group
144 |     Value: !Ref FSxSecurityGroup


--------------------------------------------------------------------------------
/src/templates/gwfcore/gwfcore-s3.template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | AWSTemplateFormatVersion: 2010-09-09
 3 | Description: >-
 4 |   (WWPS-GLS-WF-GWFCORE-S3) A S3 bucket for storing results from genomics analysis
 5 | 
 6 | Mappings:
 7 |   TagMap:
 8 |     default:
 9 |       architecture: "genomics-workflows"
10 |       solution: "default"
11 |       tags:
12 |         - Key: "architecture"
13 |           Value: "genomics-workflows"
14 |         - Key: "solution"
15 |           Value: "default"
16 | 
17 | Parameters:
18 |   Namespace:
19 |     Type: String
20 |     Description: Namespace (e.g. project name) to use to label resources
21 |   
22 |   S3BucketName:
23 |     Type: String
24 |     Description: >-
25 |       A S3 bucket name for storing analysis results.
26 |       The bucket name must respect the S3 bucket naming conventions 
27 |       (can contain lowercase letters, numbers, periods and hyphens).
28 |       If left blank a unique bucket name will be generated.
29 |     AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))"
30 |     ConstraintDescription: "Must respect AWS naming conventions"
31 |   ExistingBucket:
32 |     Type: String
33 |     Description: Does this bucket already exist? If not, it will be created.
34 |     AllowedValues:
35 |       - "Yes"
36 |       - "No"
37 |     Default: "No"
38 | 
39 | Conditions:
40 |   BucketDoesNotExist: !Equals [ !Ref ExistingBucket, "No" ]
41 |   GenerateBucketName: !Equals [ !Ref S3BucketName, "" ]
42 | 
43 | Resources:
44 |   S3Bucket:
45 |     Type: AWS::S3::Bucket
46 |     Condition: BucketDoesNotExist
47 |     DeletionPolicy: Retain
48 |     UpdateReplacePolicy: Retain
49 |     Properties:
50 |       BucketName:
51 |         Fn::If:
52 |           - GenerateBucketName
53 |           - !Sub gwfcore-${Namespace}
54 |           - !Ref S3BucketName
55 |       BucketEncryption:
56 |         ServerSideEncryptionConfiguration:
57 |           - ServerSideEncryptionByDefault:
58 |              SSEAlgorithm: AES256
59 |       Tags:
60 |         - Key: architecture
61 |           Value: !FindInMap ["TagMap", "default", "architecture"]
62 | 
63 | Outputs:
64 |   BucketName:
65 |     Value:
66 |       Fn::If:
67 |         - BucketDoesNotExist
68 |         - !Ref S3Bucket
69 |         - !Ref S3BucketName
70 |   BucketArn:
71 |     Value:
72 |       Fn::If:
73 |         - BucketDoesNotExist
74 |         - !GetAtt S3Bucket.Arn
75 |         - !Sub arn:aws:s3:::${S3BucketName}
76 | ...
77 | 


--------------------------------------------------------------------------------