├── .gitignore
├── ATTRIBUTION.txt
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── NOTICE.txt
├── README.md
├── deployment
    ├── build-s3-dist.sh
    ├── copy-static-files.sh
    ├── genomics-tertiary-analysis-and-data-lakes-using-aws-glue-and-amazon-athena.template
    ├── guidance-for-multi-omics-and-multi-modal-data-integration-and-analysis-on-aws.template
    └── run-unit-tests.sh
├── multi-omics.code-workspace
└── source
    ├── GenomicsAnalysisCode
        ├── TCIA_etl.yaml
        ├── code_cfn.yml
        ├── copyresources_buildspec.yml
        ├── omics_cfn.yml
        ├── omicsresources_buildspec.yml
        ├── quicksight_cfn.yml
        ├── resources
        │   ├── notebooks
        │   │   ├── cohort-building.ipynb
        │   │   ├── runbook.ipynb
        │   │   └── summarize-tcga-datasets.ipynb
        │   ├── omics
        │   │   ├── create_annotation_store_lambda.py
        │   │   ├── create_reference_store_lambda.py
        │   │   ├── create_variant_store_lambda.py
        │   │   ├── import_annotation_lambda.py
        │   │   ├── import_reference_lambda.py
        │   │   └── import_variant_lambda.py
        │   └── scripts
        │   │   ├── create_tcga_summary.py
        │   │   ├── image_api_glue.py
        │   │   ├── run_tests.py
        │   │   ├── tcga_etl_common_job.py
        │   │   └── transfer_tcia_images_glue.py
        ├── run_crawlers.sh
        └── setup
        │   ├── lambda.py
        │   └── requirements.txt
    ├── GenomicsAnalysisPipe
        └── pipe_cfn.yml
    ├── GenomicsAnalysisZone
        └── zone_cfn.yml
    ├── TCIA_etl.yaml
    ├── setup.sh
    ├── setup_cfn.yml
    └── teardown.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | ./source/GenomicsAnalysisCode/setup/crhelper*
2 | ./source/GenomicsAnalysisCode/setup/tests*
3 | multi-omics.code-workspace
4 | bundle
5 | deployment/global-s3-assets
6 | deployment/regional-s3-assets
7 | source/GenomicsAnalysisCode/setup
8 | 


--------------------------------------------------------------------------------
/ATTRIBUTION.txt:
--------------------------------------------------------------------------------
 1 | The Genomics Tertiary Analysis and Data Lakes Product includes the following third-party software/licensing:
 2 | 
 3 | License file for pydicom, a pure-python DICOM library
 4 | 
 5 | Copyright (c) 2008-2020 Darcy Mason and pydicom contributors
 6 | 
 7 | Except for portions outlined below, pydicom is released under an MIT license:
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in
17 | all copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 | THE SOFTWARE.
26 | 
27 | Portions of pydicom (private dictionary file(s)) were generated from the 
28 | private dictionary of the GDCM library, released under the following license:
29 | 
30 |   Program: GDCM (Grassroots DICOM). A DICOM library
31 |   Module:  http://gdcm.sourceforge.net/Copyright.html
32 | 
33 | Copyright (c) 2006-2010 Mathieu Malaterre
34 | Copyright (c) 1993-2005 CREATIS
35 | (CREATIS = Centre de Recherche et d'Applications en Traitement de l'Image)
36 | All rights reserved.
37 | 
38 | Redistribution and use in source and binary forms, with or without
39 | modification, are permitted provided that the following conditions are met:
40 | 
41 |  * Redistributions of source code must retain the above copyright notice,
42 |    this list of conditions and the following disclaimer.
43 | 
44 |  * Redistributions in binary form must reproduce the above copyright notice,
45 |    this list of conditions and the following disclaimer in the documentation
46 |    and/or other materials provided with the distribution.
47 | 
48 |  * Neither name of Mathieu Malaterre, or CREATIS, nor the names of any
49 |    contributors (CNRS, INSERM, UCB, Universite Lyon I), may be used to
50 |    endorse or promote products derived from this software without specific
51 |    prior written permission.
52 | 
53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
54 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
57 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
59 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
60 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [1.0.0] - 2020-07-07
 8 | ### Added
 9 | - initial release
10 | 
11 | ## [1.0.1] - 2020-08-20
12 | ### Added
13 | - Removed "Admin" as a key administrator from data catalog encryption key so an "Admin" role is not required to exist in the account for the solution to install and work.
14 | 
15 | ## [2.0.0] - 2022-06-20
16 | ### Added
17 | - Added guidance for multi-omics multi-modal analysis using The Cancer Genome Atlas (TCGA) and The Cancer Genome Imaging Atlas (TCIA)
18 | 
19 | ## [3.0.0] - 2023-01-10
20 | ### Added
21 | - Added guidance on working with a Reference Store, Variant Store & Annotation Store in Amazon Omics in a multi-modal context.
22 | - Replaced ETL pipelines for Genomics data (1k, clinvar and example VCF) with Amazon Omics 
23 | 
24 | ## [3.0.0] - 2023-04-28
25 | ### Added
26 | - Update bucket creation steps to comply with https://aws.amazon.com/about-aws/whats-new/2023/04/amazon-s3-two-security-best-practices-buckets-default/


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/awslabs/genomics-analysis/issues), or [recently closed](https://github.com/awslabs/genomics-analysis/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/genomics-analysis/labels/help%20wanted) issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/awslabs/genomics-analysis/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
 1 | Genomics Tertiary Analysis and Data Lakes Using AWS Glue and Amazon Athena
 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License Version 2.0 (the "License"). You may not use this file except
 4 | in compliance with the License. A copy of the License is located at http://www.apache.org/licenses/
 5 | or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
 6 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the
 7 | specific language governing permissions and limitations under the License.
 8 | 
 9 | **********************
10 | THIRD PARTY COMPONENTS
11 | **********************
12 | This software includes third party software subject to the following copyrights:
13 | 
14 | AWS SDK under the Apache License Version 2.0
15 | AWS Custom Resource Helper under the Apache License Version 2.0
16 | 
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Guidance for Multi-Omics and Multi-Modal Data Integration and Analysis on AWS
  2 | This guidance creates a scalable environment in AWS to prepare genomic, clinical, mutation, expression and imaging data for large-scale analysis and perform interactive queries against a data lake. This solution demonstrates how to 1) build, package, and deploy libraries used for genomics data conversion, 2) provision serverless data ingestion pipelines for multi-modal data preparation and cataloging, 3) visualize and explore clinical data through an interactive interface, and 4) run interactive analytic queries against a multi-modal data lake. This solution also demonstrates how to use AWS Omics to create and work with a Sequence Store, Reference Store and Variant Store in a multi-modal context.
  3 | 
  4 | # Setup
  5 | You can setup the solution in your account by clicking the "Deploy sample code on Console" button on the [solution home page](https://aws.amazon.com/solutions/guidance/guidance-for-multi-omics-and-multi-modal-data-integration-and-analysis/).
  6 | 
  7 | # Customization
  8 | 
  9 | ## Running unit tests for customization
 10 | * Clone the repository, then make the desired code changes
 11 | * Next, run unit tests to make sure added customization passes the tests
 12 | ```
 13 | cd ./deployment
 14 | chmod +x ./run-unit-tests.sh
 15 | ./run-unit-tests.sh
 16 | ```
 17 | 
 18 | ## Prerequisites
 19 | 
 20 | 1. Create a distribution bucket, i.e., my-bucket-name
 21 | 2. Create a region based distribution, i.e., bucket my-bucket-name-us-west-2
 22 | 3. Create a Cloud9 environment.
 23 | 4. Clone this repo into that environment.
 24 | 
 25 | ## Building and deploying distributable for customization
 26 | Configure the bucket name and region of your target Amazon S3 distribution bucket and run the following statements. 
 27 | 
 28 | ```
 29 | _Note:_ You would have to create an S3 bucket with the prefix 'my-bucket-name-<aws_region>'; aws_region is where you are testing the customized solution.
 30 | ```
 31 | 
 32 | ```
 33 | #bucket where customized code will reside (without -<region> at the end. The -<region will be added>)
 34 | export DIST_OUTPUT_BUCKET=my-bucket-name 
 35 | 
 36 | #default region where resources will get created
 37 | #Use "us-east-1" to get publicly available data from AWS solution bucket
 38 | export REGION=my-region
 39 | 
 40 | #default name of the solution (use this name to get publicly available test datasets from AWS S3 bucket)
 41 | export SOLUTION_NAME=genomics-tertiary-analysis-and-data-lakes-using-aws-glue-and-amazon-athena
 42 | 
 43 | #version number for the customized code (use this version to get publicly available test datasets from AWS S3 bucket)
 44 | export VERSION=latest
 45 | ```
 46 | 
 47 | #### Change to deployment directory.
 48 | ```
 49 | cd deployment
 50 | ```
 51 | 
 52 | #### Build the distributable.
 53 | ```
 54 | chmod +x ./build-s3-dist.sh
 55 | ./build-s3-dist.sh $DIST_OUTPUT_BUCKET $SOLUTION_NAME $VERSION
 56 | ```
 57 | 
 58 | #### Deploy the distributable to an Amazon S3 bucket in your account. _Note:_ you must have the AWS Command Line Interface installed
 59 | ```
 60 | aws s3 cp ./$SOLUTION_NAME.template s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/
 61 | ```
 62 | 
 63 | #### Deploy the global assets.
 64 | 
 65 | ```
 66 | aws s3 cp ./global-s3-assets/ s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION --recursive
 67 | ```
 68 | 
 69 | #### Deploy the regional assets.
 70 |  
 71 | ```
 72 | aws s3 cp ./regional-s3-assets/ s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION --recursive
 73 | ```
 74 | 
 75 | #### Copy the static assets.
 76 |  
 77 | ```
 78 | ./copy-static-files.sh [Optional]AWSProfile
 79 | ```
 80 | 
 81 | #### Go to the DIST_OUTPUT_BUCKET and copy the OBJECT URL for latest/guidance-for-multi-omics-and-multi-modal-data-integration-and-analysis-on-aws.template.
 82 | 
 83 | #### Go to the AWS CloudFormation Console and create a new stack using the template URL copied.
 84 | 
 85 | # File Structure
 86 | The overall file structure for the application.
 87 | 
 88 | ```
 89 | .
 90 | ├── ATTRIBUTION.txt
 91 | ├── CHANGELOG.md
 92 | ├── CODE_OF_CONDUCT.md
 93 | ├── CONTRIBUTING.md
 94 | ├── LICENSE.txt
 95 | ├── NOTICE.txt
 96 | ├── README.md
 97 | ├── buildspec.yml
 98 | ├── deploy.sh
 99 | ├── deployment
100 | │   ├── build-s3-dist.sh
101 | │── source
102 | │   ├── GenomicsAnalysisCode
103 | │   │   ├── TCIA_etl.yaml
104 | │   │   ├── code_cfn.yml
105 | │   │   ├── copyresources_buildspec.yml
106 | │   │   ├── omics_cfn.yml
107 | │   │   ├── omicsresources_buildspec.yml
108 | │   │   ├── quicksight_cfn.yml
109 | │   │   ├── resources
110 | │   │   │   ├── notebooks
111 | │   │   │   │   ├── cohort-building.ipynb
112 | │   │   │   │   ├── runbook.ipynb
113 | │   │   │   │   └── summarize-tcga-datasets.ipynb
114 | │   │   │   ├── omics
115 | │   │   │   │   ├── create_annotation_store_lambda.py
116 | │   │   │   │   ├── create_reference_store_lambda.py
117 | │   │   │   │   ├── create_variant_store_lambda.py
118 | │   │   │   │   ├── import_annotation_lambda.py
119 | │   │   │   │   ├── import_reference_lambda.py
120 | │   │   │   │   └── import_variant_lambda.py
121 | │   │   │   └── scripts
122 | │   │   │       ├── create_tcga_summary.py
123 | │   │   │       ├── image_api_glue.py
124 | │   │   │       ├── run_tests.py
125 | │   │   │       ├── tcga_etl_common_job.py
126 | │   │   │       └── transfer_tcia_images_glue.py
127 | │   │   ├── run_crawlers.sh
128 | │   │   └── setup
129 | │   │       ├── lambda.py
130 | │   │       └── requirements.txt
131 | │   ├── GenomicsAnalysisPipe
132 | │   │   └── pipe_cfn.yml
133 | │   ├── GenomicsAnalysisZone
134 | │   │   └── zone_cfn.yml
135 | │   ├── TCIA_etl.yaml
136 | │   ├── setup.sh
137 | │   ├── setup_cfn.yml
138 | │   └── teardown.sh
139 | ├── template_cfn.yml
140 | ```
141 | 
142 | ***
143 | 
144 | This solution collects anonymous operational metrics to help AWS improve the
145 | quality of features of the solution. For more information, including how to disable
146 | this capability, please see the [implementation guide](https://docs.aws.amazon.com/solutions/latest/guidance-for-multi-omics-and-multi-modal-data-integration-and-analysis-on-aws/appendix-i.html).
147 | 
148 | ---
149 | 
150 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
151 | 
152 | Licensed under the Apache License Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at
153 | 
154 |     http://www.apache.org/licenses/
155 | 
156 | or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
157 | 


--------------------------------------------------------------------------------
/deployment/build-s3-dist.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # This assumes all of the OS-level configuration has been completed and git repo has already been cloned
  4 | #
  5 | # This script should be run from the repo's deployment directory
  6 | # cd deployment
  7 | # ./build-s3-dist.sh source-bucket-base-name solution-name version-code
  8 | #
  9 | # Paramenters:
 10 | #  - source-bucket-base-name: Name for the S3 bucket location where the template will source the Lambda
 11 | #    code from. The template will append '-[region_name]' to this bucket name.
 12 | #    For example: ./build-s3-dist.sh solutions my-solution v1.0.0
 13 | #    The template will then expect the source code to be located in the solutions-[region_name] bucket
 14 | #
 15 | #  - solution-name: name of the solution for consistency
 16 | #
 17 | #  - version-code: version of the package
 18 | 
 19 | # Check to see if input has been provided:
 20 | if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
 21 |     echo "Please provide the base source bucket name, trademark approved solution name and version where the lambda code will eventually reside."
 22 |     echo "For example: ./build-s3-dist.sh solutions trademarked-solution-name v1.0.0"
 23 |     exit 1
 24 | fi
 25 | 
 26 | # Get reference for all important folders
 27 | template_dir="$PWD"
 28 | template_dist_dir="$template_dir/global-s3-assets"
 29 | build_dist_dir="$template_dir/regional-s3-assets"
 30 | source_dir="$template_dir/../source"
 31 | 
 32 | cp $source_dir/setup_cfn.yml $template_dir/guidance-for-multi-omics-and-multi-modal-data-integration-and-analysis-on-aws.template
 33 | cp $source_dir/setup_cfn.yml $template_dir/$SOLUTION_NAME.template
 34 | 
 35 | echo "------------------------------------------------------------------------------"
 36 | echo "[Init] Clean old dist"
 37 | echo "------------------------------------------------------------------------------"
 38 | echo "rm -rf $template_dist_dir"
 39 | rm -rf $template_dist_dir
 40 | echo "mkdir -p $template_dist_dir"
 41 | mkdir -p $template_dist_dir
 42 | #echo "rm -rf $build_dist_dir"
 43 | #rm -rf $build_dist_dir
 44 | echo "mkdir -p $build_dist_dir"
 45 | mkdir -p $build_dist_dir
 46 | 
 47 | echo "------------------------------------------------------------------------------"
 48 | echo "[Packing] Templates"
 49 | echo "------------------------------------------------------------------------------"
 50 | echo "cp $template_dir/*.template $template_dist_dir/"
 51 | cp $template_dir/*.template $template_dist_dir/
 52 | echo "copy yaml templates and rename"
 53 | #cp $template_dir/*.yml $template_dist_dir/
 54 | #cd $template_dist_dir
 55 | # Rename all *.yaml to *.template
 56 | #for f in *.yml; do
 57 | #    mv -- "$f" "${f%.yml}.template"
 58 | #done
 59 | 
 60 | #cd ..
 61 | echo "Updating code source bucket in template with $1"
 62 | replace="s/%%BUCKET_NAME%%/$1/g"
 63 | echo "sed -i '' -e $replace $template_dist_dir/*.template"
 64 | sed -i '' -e $replace $template_dist_dir/*.template
 65 | replace="s/%%SOLUTION_NAME%%/$2/g"
 66 | echo "sed -i '' -e $replace $template_dist_dir/*.template"
 67 | sed -i '' -e $replace $template_dist_dir/*.template
 68 | replace="s/%%VERSION%%/$3/g"
 69 | echo "sed -i '' -e $replace $template_dist_dir/*.template"
 70 | sed -i '' -e $replace $template_dist_dir/*.template
 71 | 
 72 | mkdir -p $build_dist_dir/references/hg38
 73 | mkdir -p $build_dist_dir/variants/1kg/
 74 | mkdir $build_dist_dir/variants/vcf/
 75 | mkdir -p $build_dist_dir/annotation/clinvar/
 76 | mkdir $build_dist_dir/tcga/
 77 | 
 78 | echo "------------------------------------------------------------------------------"
 79 | echo "[Rebuild] Solution"
 80 | echo "------------------------------------------------------------------------------"
 81 | 
 82 | cd $source_dir
 83 | 
 84 | bundle_dir="$source_dir/../bundle"
 85 | mkdir -p $bundle_dir
 86 | 
 87 | # create the lambda function deployment pacakage for the solution setup
 88 | cd $source_dir/GenomicsAnalysisCode/setup
 89 | pip install -t . crhelper
 90 | zip -r $bundle_dir/SolutionSetup.zip .
 91 | 
 92 | # package the solution
 93 | cd $source_dir
 94 | zip -r $bundle_dir/Solution.zip .
 95 | 
 96 | # package new lambdas here 
 97 | 
 98 | # upload zips here 
 99 | 
100 | cd $bundle_dir
101 | cp Solution.zip $template_dist_dir/
102 | cp SolutionSetup.zip $template_dist_dir/
103 | cp Solution.zip $build_dist_dir/
104 | cp SolutionSetup.zip $build_dist_dir/
105 | 


--------------------------------------------------------------------------------
/deployment/copy-static-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | AWS_PROFILE=$1
 4 | 
 5 | # use profile if 2nd argument provided
 6 | if [ $# -eq 1 ]
 7 |   then
 8 |     AWS_PROFILE=" --profile ${1}"
 9 | fi
10 | 
11 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/annotation/clinvar/clinvar.vcf.gz s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/annotation/clinvar/clinvar.vcf.gz --copy-props none $AWS_PROFILE
12 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/variants/vcf/variants.vcf.gz s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/variants/vcf/variants.vcf.gz --copy-props none $AWS_PROFILE
13 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/references/hg38/Homo_sapiens_assembly38.fasta s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/references/hg38/Homo_sapiens_assembly38.fasta --copy-props none $AWS_PROFILE
14 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz --copy-props none $AWS_PROFILE
15 | 
16 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/tcga/tcga-clinical.zip s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/tcga/tcga-clinical.zip --copy-props none $AWS_PROFILE
17 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/tcga/tcga-cnv.zip s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/tcga/tcga-cnv.zip --copy-props none $AWS_PROFILE
18 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/tcga/tcga-expression.zip s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/tcga/tcga-expression.zip --copy-props none $AWS_PROFILE
19 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/tcga/tcga-mutation.zip s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/tcga/tcga-mutation.zip --copy-props none $AWS_PROFILE
20 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/tcga/tcia-metadata.zip s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/tcga/tcia-metadata.zip --copy-props none $AWS_PROFILE
21 | aws s3 cp s3://solutions-$REGION/$SOLUTION_NAME/v3.0.0/tcga/tcga-summary.zip s3://$DIST_OUTPUT_BUCKET-$REGION/$SOLUTION_NAME/$VERSION/tcga/tcga-summary.zip --copy-props none $AWS_PROFILE
22 | 
23 | 


--------------------------------------------------------------------------------
/deployment/genomics-tertiary-analysis-and-data-lakes-using-aws-glue-and-amazon-athena.template:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | 
  3 | Description: |
  4 |   (SO0077) - This Guidance for Multi-Omics and Multi-Modal Data Integration and Analysis on AWS creates a scalable environment in AWS to prepare genomic, clinical, mutation, expression and imaging data for large-scale analysis and perform interactive queries against a data lake. This solution demonstrates how to 1)Provision Amazon Omics resources to ingest, store and query genomics data, 2) provision serverless data ingestion pipelines for multi-modal data preparation and cataloging, 3) visualize and explore clinical data through an interactive interface, and 4) run interactive analytic queries against a multi-modal data lake. Version v3.0.0.
  5 | 
  6 | Mappings:
  7 |   Send:
  8 |     AnonymousUsage:
  9 |       Data: Yes
 10 |   SourceCode:
 11 |     General:
 12 |       S3Bucket: '%%BUCKET_NAME%%'
 13 |       KeyPrefix: '%%SOLUTION_NAME%%/%%VERSION%%'
 14 | 
 15 | Parameters:
 16 |   Project:
 17 |     Type: String
 18 |     Description: >
 19 |       The project name for this solution. The project name will be used to prefix resources created by this solution. The solution Glue database name and Athena workgroup name will be the project name. Project names should be unique to a project.
 20 |     AllowedPattern: "[a-zA-Z0-9-]{3,24}"
 21 |     ConstraintDescription: >
 22 |       Project name should be unique, 3-24 characters in length, and only have alphanumeric characters and hyphens ([a-zA-Z0-9-]{3,32}).
 23 |     Default: GenomicsAnalysis
 24 | 
 25 | Resources:
 26 |   Setup:
 27 |     Type: Custom::Setup
 28 |     DependsOn:
 29 |       - CodeBuild
 30 |     Version: 1.0
 31 |     Properties:
 32 |       ServiceToken: !Sub ${SetupLambda.Arn}
 33 |       CodeBuildProjectName: !Sub ${CodeBuild}
 34 | 
 35 |   SetupLambda:
 36 |     Type: AWS::Lambda::Function
 37 |     DependsOn:
 38 |       - SetupLambdaRole
 39 |     Properties:
 40 |       Handler: lambda.handler
 41 |       Runtime: python3.8
 42 |       FunctionName: !Sub ${Project}Setup
 43 |       Code:
 44 |         S3Bucket: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]]
 45 |         S3Key: !Join ["", [!FindInMap ["SourceCode", "General", "KeyPrefix"], "/SolutionSetup.zip"]]
 46 |       Role: !Sub ${SetupLambdaRole.Arn}
 47 |       Timeout: 600
 48 |     Metadata:
 49 |       cfn_nag:
 50 |         rules_to_suppress:
 51 |           - id: W58
 52 |             reason: Bug in CfnNag.
 53 |           - id: W89
 54 |             reason: Lambda only used on setup.
 55 |           - id: W92
 56 |             reason: No need for concurrent execution.
 57 |   SetupLambdaRole:
 58 |     Type: AWS::IAM::Role
 59 |     DependsOn:
 60 |       - CodeBuild
 61 |     Properties:
 62 |       AssumeRolePolicyDocument:
 63 |         Version: 2012-10-17
 64 |         Statement:
 65 |           - Action:
 66 |               - sts:AssumeRole
 67 |             Effect: Allow
 68 |             Principal:
 69 |               Service:
 70 |                 - lambda.amazonaws.com
 71 |       Path: /
 72 |       Policies:
 73 |         - PolicyName: LogsAccess
 74 |           PolicyDocument:
 75 |             Statement:
 76 |               - Effect: Allow
 77 |                 Action:
 78 |                   - logs:CreateLogGroup
 79 |                   - logs:CreateLogStream
 80 |                   - logs:PutLogEvents
 81 |                 Resource:
 82 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${Project}*
 83 |         - PolicyName: CodeBuildAccess
 84 |           PolicyDocument:
 85 |             Statement:
 86 |               - Effect: Allow
 87 |                 Action:
 88 |                   - codebuild:BatchGetProjects
 89 |                   - codebuild:BatchGetBuilds
 90 |                   - codebuild:StartBuild
 91 |                 Resource:
 92 |                   - !Sub ${CodeBuild.Arn}
 93 |         - PolicyName: EventsAccess
 94 |           PolicyDocument:
 95 |             Statement:
 96 |               - Effect: Allow
 97 |                 Action:
 98 |                   - events:DeleteRule
 99 |                   - events:PutRule
100 |                   - events:PutTargets
101 |                   - events:RemoveTargets
102 |                 Resource:
103 |                   - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/Setup*
104 |         - PolicyName: LambdaAccess
105 |           PolicyDocument:
106 |             Statement:
107 |               - Effect: Allow
108 |                 Action:
109 |                   - lambda:AddPermission
110 |                   - lambda:RemovePermission
111 |                 Resource:
112 |                   - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${Project}*
113 | 
114 |   CodeBuildRole:
115 |       Type: AWS::IAM::Role
116 |       Properties:
117 |         AssumeRolePolicyDocument:
118 |           Version: 2012-10-17
119 |           Statement:
120 |             - Action:
121 |                 - sts:AssumeRole
122 |               Effect: Allow
123 |               Principal:
124 |                 Service:
125 |                   - codebuild.amazonaws.com
126 |         Path: /
127 |         Policies:
128 |           - PolicyName: CloudFormationAccess
129 |             PolicyDocument:
130 |               Statement:
131 |                 - Action:
132 |                     - cloudformation:CreateStack
133 |                     - cloudformation:DescribeStacks
134 |                     - cloudformation:DescribeStackResource
135 |                     - cloudformation:DescribeStackResources
136 |                     - cloudformation:UpdateStack
137 |                     - cloudformation:DeleteStack
138 |                     - cloudformation:UpdateTerminationProtection
139 |                   Effect: Allow
140 |                   Resource: !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${Project}*
141 |           - PolicyName: LogsAccess
142 |             PolicyDocument:
143 |               Statement:
144 |                 - Effect: Allow
145 |                   Action:
146 |                     - logs:CreateLogGroup
147 |                     - logs:CreateLogStream
148 |                     - logs:PutLogEvents
149 |                   Resource:
150 |                     - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/${Project}*
151 |           - PolicyName: IAMAccess
152 |             PolicyDocument:
153 |               Statement:
154 |                 - Effect: Allow
155 |                   Action:
156 |                     - iam:CreateRole
157 |                     - iam:DeleteRole
158 |                     - iam:PutRolePolicy
159 |                     - iam:DeleteRolePolicy
160 |                     - iam:AttachRolePolicy
161 |                     - iam:DetachRolePolicy
162 |                     - iam:UpdateAssumeRolePolicy
163 |                     - iam:PassRole
164 |                     - iam:GetRole
165 |                     - iam:GetInstanceProfile
166 |                     - iam:CreateInstanceProfile
167 |                     - iam:DeleteInstanceProfile
168 |                     - iam:AddRoleToInstanceProfile
169 |                     - iam:RemoveRoleFromInstanceProfile
170 |                   Resource:
171 |                     - !Sub arn:aws:iam::${AWS::AccountId}:role/${Project}*
172 |                     - !Sub arn:aws:iam::${AWS::AccountId}:instance-profile/${Project}*
173 |                     - !Sub arn:aws:iam::${AWS::AccountId}:role/aws-quicksight-service-role-v0
174 |           - PolicyName: CodeBuildAccess
175 |             PolicyDocument:
176 |               Statement:
177 |                 - Effect: Allow
178 |                   Action:
179 |                     - codebuild:CreateProject
180 |                     - codebuild:UpdateProject
181 |                     - codebuild:ListProjects
182 |                     - codebuild:BatchGetProjects
183 |                     - codebuild:DeleteProject
184 |                   Resource:
185 |                     - !Sub arn:aws:codebuild:${AWS::Region}:${AWS::AccountId}:project/${Project}*
186 |           - PolicyName: CodePipelineAccess
187 |             PolicyDocument:
188 |               Statement:
189 |                 - Effect: Allow
190 |                   Action:
191 |                     - codepipeline:CreatePipeline
192 |                     - codepipeline:GetPipeline
193 |                     - codepipeline:UpdatePipeline
194 |                     - codepipeline:DeletePipeline
195 |                     - codepipeline:GetPipelineState
196 |                     - codepipeline:ListPipelineExecutions
197 |                   Resource:
198 |                     - !Sub arn:aws:codepipeline:${AWS::Region}:${AWS::AccountId}:${Project}*
199 |           - PolicyName: CodeCommitAccess
200 |             PolicyDocument:
201 |               Statement:
202 |                 - Effect: Allow
203 |                   Action:
204 |                     - codecommit:CreateBranch
205 |                     - codecommit:CreateRepository
206 |                     - codecommit:GetRepository
207 |                     - codecommit:DeleteRepository
208 |                     - codecommit:CreateCommit
209 |                     - codecommit:GitPush
210 |                     - codecommit:GitPull
211 |                     - codecommit:DeleteBranch
212 |                   Resource:
213 |                     - !Sub arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:${Project}*
214 |                 - Effect: Allow
215 |                   Action:
216 |                     - codecommit:ListRepositories
217 |                   Resource: '*'
218 |           - PolicyName: EventsAccess
219 |             PolicyDocument:
220 |               Statement:
221 |                 - Effect: Allow
222 |                   Action:
223 |                     - events:DescribeRule
224 |                     - events:PutRule
225 |                     - events:DeleteRule
226 |                     - events:PutTargets
227 |                     - events:RemoveTargets
228 |                   Resource:
229 |                     - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/*
230 |           - PolicyName: GlueAccess
231 |             PolicyDocument:
232 |               Statement:
233 |                 - Effect: Allow
234 |                   Action:
235 |                     - glue:StartCrawler
236 |                     - glue:GetCrawlers
237 |                     - glue:StartWorkflowRun
238 |                   Resource: '*'
239 |           - PolicyName: LambdaAccess
240 |             PolicyDocument:
241 |               Statement:
242 |                 - Effect: Allow
243 |                   Action:
244 |                     - lambda:GetFunction
245 |                     - lambda:CreateFunction
246 |                     - lambda:DeleteFunction
247 |                     - lambda:InvokeFunction
248 |                   Resource:
249 |                     - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${Project}*
250 |           - PolicyName: S3Access
251 |             PolicyDocument:
252 |               Statement:
253 |                 - Effect: Allow
254 |                   Action:
255 |                     - s3:GetObject
256 |                   Resource:
257 |                     !Join
258 |                       - ''
259 |                       - - 'arn:aws:s3:::'
260 |                         - !Join
261 |                             - '-'
262 |                             - - !FindInMap ["SourceCode", "General", "S3Bucket"]
263 |                               - Ref: "AWS::Region"
264 |                         - '/*'
265 |                 - Effect: Allow
266 |                   Action:
267 |                     - s3:ListBucket
268 |                   Resource:
269 |                     !Join
270 |                       - ''
271 |                       - - 'arn:aws:s3:::'
272 |                         - !Join
273 |                             - '-'
274 |                             - - !FindInMap ["SourceCode", "General", "S3Bucket"]
275 |                               - Ref: "AWS::Region"
276 | 
277 |                 - Effect: Allow
278 |                   Action:
279 |                     - s3:PutObjectAcl
280 |                     - s3:GetObject
281 |                     - s3:PutObject
282 |                     - s3:DeleteObject
283 |                     - s3:ListBucket
284 |                     - s3:CreateBucket
285 |                     - s3:DeleteBucket
286 |                     - s3:PutEncryptionConfiguration
287 |                     - s3:PutBucketPublicAccessBlock
288 |                     - s3:PutBucketLogging
289 |                     - s3:PutBucketAcl
290 |                     - s3:PutBucketOwnershipControls
291 |                   Resource:
292 |                     - arn:aws:s3:::*pipe*
293 |                     - arn:aws:s3:::*pipe*/*
294 |                 - Effect: Allow
295 |                   Action:
296 |                     - s3:CreateBucket
297 |                     - s3:DeleteBucket
298 |                     - s3:ListBucket
299 |                     - s3:PutEncryptionConfiguration
300 |                     - s3:PutBucketPublicAccessBlock
301 |                     - s3:PutBucketLogging
302 |                     - s3:PutBucketAcl
303 |                     - s3:PutObject
304 |                     - s3:PutObjectAcl
305 |                     - s3:PutBucketOwnershipControls
306 |                   Resource:
307 |                     - arn:aws:s3:::*pipe*
308 |                     - arn:aws:s3:::*pipe*/*
309 | 
310 |       Metadata:
311 |         cfn_nag:
312 |           rules_to_suppress:
313 |             - id: W11
314 |               reason: Star required for codecommit:ListRepositories and Glue actions.
315 |   
316 |   CodeBuild:
317 |     Type: AWS::CodeBuild::Project
318 |     Properties:
319 |       Name: !Sub ${Project}Setup
320 |       Artifacts:
321 |         Type: NO_ARTIFACTS
322 |       Source:
323 |         Type: NO_SOURCE
324 |         BuildSpec: !Sub |
325 |           version: 0.2
326 |           phases:
327 |             install:
328 |               commands:
329 |                 - git config --global user.name automated_user
330 |                 - git config --global user.email automated_email
331 |                 - git config --global credential.helper '!aws codecommit credential-helper $@'
332 |                 - git config --global credential.UseHttpPath true
333 |                 - aws s3 cp s3://$ARTIFACT_BUCKET/$ARTIFACT_KEY_PREFIX/Solution.zip .
334 |                 - unzip Solution.zip
335 |                 - ./$SOLUTION_ACTION.sh
336 |       Environment:
337 |         ComputeType: BUILD_GENERAL1_SMALL
338 |         EnvironmentVariables:
339 |           - Name: SOLUTION_ACTION
340 |             Value: setup
341 |           - Name: PROJECT_NAME
342 |             Value: !Ref Project
343 |           - Name: ARTIFACT_BUCKET
344 |             Value: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]]
345 |           - Name: ARTIFACT_KEY_PREFIX
346 |             Value: !FindInMap ["SourceCode", "General", "KeyPrefix"]
347 |         Image: aws/codebuild/standard:6.0
348 |         Type: LINUX_CONTAINER
349 |       ServiceRole: !Sub ${CodeBuildRole}
350 |       TimeoutInMinutes: 60
351 |     Metadata:
352 |       cfn_nag:
353 |         rules_to_suppress:
354 |           - id: W32
355 |             reason: Customer can enable encryption if desired.
356 | 


--------------------------------------------------------------------------------
/deployment/guidance-for-multi-omics-and-multi-modal-data-integration-and-analysis-on-aws.template:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | 
  3 | Description: |
  4 |   (SO0077) - This Guidance for Multi-Omics and Multi-Modal Data Integration and Analysis on AWS creates a scalable environment in AWS to prepare genomic, clinical, mutation, expression and imaging data for large-scale analysis and perform interactive queries against a data lake. This solution demonstrates how to 1)Provision Amazon Omics resources to ingest, store and query genomics data, 2) provision serverless data ingestion pipelines for multi-modal data preparation and cataloging, 3) visualize and explore clinical data through an interactive interface, and 4) run interactive analytic queries against a multi-modal data lake. Version v3.0.0.
  5 | 
  6 | Mappings:
  7 |   Send:
  8 |     AnonymousUsage:
  9 |       Data: Yes
 10 |   SourceCode:
 11 |     General:
 12 |       S3Bucket: '%%BUCKET_NAME%%'
 13 |       KeyPrefix: '%%SOLUTION_NAME%%/%%VERSION%%'
 14 | 
 15 | Parameters:
 16 |   Project:
 17 |     Type: String
 18 |     Description: >
 19 |       The project name for this solution. The project name will be used to prefix resources created by this solution. The solution Glue database name and Athena workgroup name will be the project name. Project names should be unique to a project.
 20 |     AllowedPattern: "[a-zA-Z0-9-]{3,24}"
 21 |     ConstraintDescription: >
 22 |       Project name should be unique, 3-24 characters in length, and only have alphanumeric characters and hyphens ([a-zA-Z0-9-]{3,32}).
 23 |     Default: GenomicsAnalysis
 24 | 
 25 | Resources:
 26 |   Setup:
 27 |     Type: Custom::Setup
 28 |     DependsOn:
 29 |       - CodeBuild
 30 |     Version: 1.0
 31 |     Properties:
 32 |       ServiceToken: !Sub ${SetupLambda.Arn}
 33 |       CodeBuildProjectName: !Sub ${CodeBuild}
 34 | 
 35 |   SetupLambda:
 36 |     Type: AWS::Lambda::Function
 37 |     DependsOn:
 38 |       - SetupLambdaRole
 39 |     Properties:
 40 |       Handler: lambda.handler
 41 |       Runtime: python3.8
 42 |       FunctionName: !Sub ${Project}Setup
 43 |       Code:
 44 |         S3Bucket: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]]
 45 |         S3Key: !Join ["", [!FindInMap ["SourceCode", "General", "KeyPrefix"], "/SolutionSetup.zip"]]
 46 |       Role: !Sub ${SetupLambdaRole.Arn}
 47 |       Timeout: 600
 48 |     Metadata:
 49 |       cfn_nag:
 50 |         rules_to_suppress:
 51 |           - id: W58
 52 |             reason: Bug in CfnNag.
 53 |           - id: W89
 54 |             reason: Lambda only used on setup.
 55 |           - id: W92
 56 |             reason: No need for concurrent execution.
 57 |   SetupLambdaRole:
 58 |     Type: AWS::IAM::Role
 59 |     DependsOn:
 60 |       - CodeBuild
 61 |     Properties:
 62 |       AssumeRolePolicyDocument:
 63 |         Version: 2012-10-17
 64 |         Statement:
 65 |           - Action:
 66 |               - sts:AssumeRole
 67 |             Effect: Allow
 68 |             Principal:
 69 |               Service:
 70 |                 - lambda.amazonaws.com
 71 |       Path: /
 72 |       Policies:
 73 |         - PolicyName: LogsAccess
 74 |           PolicyDocument:
 75 |             Statement:
 76 |               - Effect: Allow
 77 |                 Action:
 78 |                   - logs:CreateLogGroup
 79 |                   - logs:CreateLogStream
 80 |                   - logs:PutLogEvents
 81 |                 Resource:
 82 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${Project}*
 83 |         - PolicyName: CodeBuildAccess
 84 |           PolicyDocument:
 85 |             Statement:
 86 |               - Effect: Allow
 87 |                 Action:
 88 |                   - codebuild:BatchGetProjects
 89 |                   - codebuild:BatchGetBuilds
 90 |                   - codebuild:StartBuild
 91 |                 Resource:
 92 |                   - !Sub ${CodeBuild.Arn}
 93 |         - PolicyName: EventsAccess
 94 |           PolicyDocument:
 95 |             Statement:
 96 |               - Effect: Allow
 97 |                 Action:
 98 |                   - events:DeleteRule
 99 |                   - events:PutRule
100 |                   - events:PutTargets
101 |                   - events:RemoveTargets
102 |                 Resource:
103 |                   - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/Setup*
104 |         - PolicyName: LambdaAccess
105 |           PolicyDocument:
106 |             Statement:
107 |               - Effect: Allow
108 |                 Action:
109 |                   - lambda:AddPermission
110 |                   - lambda:RemovePermission
111 |                 Resource:
112 |                   - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${Project}*
113 | 
114 |   CodeBuildRole:
115 |       Type: AWS::IAM::Role
116 |       Properties:
117 |         AssumeRolePolicyDocument:
118 |           Version: 2012-10-17
119 |           Statement:
120 |             - Action:
121 |                 - sts:AssumeRole
122 |               Effect: Allow
123 |               Principal:
124 |                 Service:
125 |                   - codebuild.amazonaws.com
126 |         Path: /
127 |         Policies:
128 |           - PolicyName: CloudFormationAccess
129 |             PolicyDocument:
130 |               Statement:
131 |                 - Action:
132 |                     - cloudformation:CreateStack
133 |                     - cloudformation:DescribeStacks
134 |                     - cloudformation:DescribeStackResource
135 |                     - cloudformation:DescribeStackResources
136 |                     - cloudformation:UpdateStack
137 |                     - cloudformation:DeleteStack
138 |                     - cloudformation:UpdateTerminationProtection
139 |                   Effect: Allow
140 |                   Resource: !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${Project}*
141 |           - PolicyName: LogsAccess
142 |             PolicyDocument:
143 |               Statement:
144 |                 - Effect: Allow
145 |                   Action:
146 |                     - logs:CreateLogGroup
147 |                     - logs:CreateLogStream
148 |                     - logs:PutLogEvents
149 |                   Resource:
150 |                     - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/${Project}*
151 |           - PolicyName: IAMAccess
152 |             PolicyDocument:
153 |               Statement:
154 |                 - Effect: Allow
155 |                   Action:
156 |                     - iam:CreateRole
157 |                     - iam:DeleteRole
158 |                     - iam:PutRolePolicy
159 |                     - iam:DeleteRolePolicy
160 |                     - iam:AttachRolePolicy
161 |                     - iam:DetachRolePolicy
162 |                     - iam:UpdateAssumeRolePolicy
163 |                     - iam:PassRole
164 |                     - iam:GetRole
165 |                     - iam:GetInstanceProfile
166 |                     - iam:CreateInstanceProfile
167 |                     - iam:DeleteInstanceProfile
168 |                     - iam:AddRoleToInstanceProfile
169 |                     - iam:RemoveRoleFromInstanceProfile
170 |                   Resource:
171 |                     - !Sub arn:aws:iam::${AWS::AccountId}:role/${Project}*
172 |                     - !Sub arn:aws:iam::${AWS::AccountId}:instance-profile/${Project}*
173 |                     - !Sub arn:aws:iam::${AWS::AccountId}:role/aws-quicksight-service-role-v0
174 |           - PolicyName: CodeBuildAccess
175 |             PolicyDocument:
176 |               Statement:
177 |                 - Effect: Allow
178 |                   Action:
179 |                     - codebuild:CreateProject
180 |                     - codebuild:UpdateProject
181 |                     - codebuild:ListProjects
182 |                     - codebuild:BatchGetProjects
183 |                     - codebuild:DeleteProject
184 |                   Resource:
185 |                     - !Sub arn:aws:codebuild:${AWS::Region}:${AWS::AccountId}:project/${Project}*
186 |           - PolicyName: CodePipelineAccess
187 |             PolicyDocument:
188 |               Statement:
189 |                 - Effect: Allow
190 |                   Action:
191 |                     - codepipeline:CreatePipeline
192 |                     - codepipeline:GetPipeline
193 |                     - codepipeline:UpdatePipeline
194 |                     - codepipeline:DeletePipeline
195 |                     - codepipeline:GetPipelineState
196 |                     - codepipeline:ListPipelineExecutions
197 |                   Resource:
198 |                     - !Sub arn:aws:codepipeline:${AWS::Region}:${AWS::AccountId}:${Project}*
199 |           - PolicyName: CodeCommitAccess
200 |             PolicyDocument:
201 |               Statement:
202 |                 - Effect: Allow
203 |                   Action:
204 |                     - codecommit:CreateBranch
205 |                     - codecommit:CreateRepository
206 |                     - codecommit:GetRepository
207 |                     - codecommit:DeleteRepository
208 |                     - codecommit:CreateCommit
209 |                     - codecommit:GitPush
210 |                     - codecommit:GitPull
211 |                     - codecommit:DeleteBranch
212 |                   Resource:
213 |                     - !Sub arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:${Project}*
214 |                 - Effect: Allow
215 |                   Action:
216 |                     - codecommit:ListRepositories
217 |                   Resource: '*'
218 |           - PolicyName: EventsAccess
219 |             PolicyDocument:
220 |               Statement:
221 |                 - Effect: Allow
222 |                   Action:
223 |                     - events:DescribeRule
224 |                     - events:PutRule
225 |                     - events:DeleteRule
226 |                     - events:PutTargets
227 |                     - events:RemoveTargets
228 |                   Resource:
229 |                     - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/*
230 |           - PolicyName: GlueAccess
231 |             PolicyDocument:
232 |               Statement:
233 |                 - Effect: Allow
234 |                   Action:
235 |                     - glue:StartCrawler
236 |                     - glue:GetCrawlers
237 |                     - glue:StartWorkflowRun
238 |                   Resource: '*'
239 |           - PolicyName: LambdaAccess
240 |             PolicyDocument:
241 |               Statement:
242 |                 - Effect: Allow
243 |                   Action:
244 |                     - lambda:GetFunction
245 |                     - lambda:CreateFunction
246 |                     - lambda:DeleteFunction
247 |                     - lambda:InvokeFunction
248 |                   Resource:
249 |                     - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${Project}*
250 |           - PolicyName: S3Access
251 |             PolicyDocument:
252 |               Statement:
253 |                 - Effect: Allow
254 |                   Action:
255 |                     - s3:GetObject
256 |                   Resource:
257 |                     !Join
258 |                       - ''
259 |                       - - 'arn:aws:s3:::'
260 |                         - !Join
261 |                             - '-'
262 |                             - - !FindInMap ["SourceCode", "General", "S3Bucket"]
263 |                               - Ref: "AWS::Region"
264 |                         - '/*'
265 |                 - Effect: Allow
266 |                   Action:
267 |                     - s3:ListBucket
268 |                   Resource:
269 |                     !Join
270 |                       - ''
271 |                       - - 'arn:aws:s3:::'
272 |                         - !Join
273 |                             - '-'
274 |                             - - !FindInMap ["SourceCode", "General", "S3Bucket"]
275 |                               - Ref: "AWS::Region"
276 | 
277 |                 - Effect: Allow
278 |                   Action:
279 |                     - s3:PutObjectAcl
280 |                     - s3:GetObject
281 |                     - s3:PutObject
282 |                     - s3:DeleteObject
283 |                     - s3:ListBucket
284 |                     - s3:CreateBucket
285 |                     - s3:DeleteBucket
286 |                     - s3:PutEncryptionConfiguration
287 |                     - s3:PutBucketPublicAccessBlock
288 |                     - s3:PutBucketLogging
289 |                     - s3:PutBucketAcl
290 |                     - s3:PutBucketOwnershipControls
291 |                   Resource:
292 |                     - arn:aws:s3:::*pipe*
293 |                     - arn:aws:s3:::*pipe*/*
294 |                 - Effect: Allow
295 |                   Action:
296 |                     - s3:CreateBucket
297 |                     - s3:DeleteBucket
298 |                     - s3:ListBucket
299 |                     - s3:PutEncryptionConfiguration
300 |                     - s3:PutBucketPublicAccessBlock
301 |                     - s3:PutBucketLogging
302 |                     - s3:PutBucketAcl
303 |                     - s3:PutObject
304 |                     - s3:PutObjectAcl
305 |                     - s3:PutBucketOwnershipControls
306 |                   Resource:
307 |                     - arn:aws:s3:::*pipe*
308 |                     - arn:aws:s3:::*pipe*/*
309 | 
310 |       Metadata:
311 |         cfn_nag:
312 |           rules_to_suppress:
313 |             - id: W11
314 |               reason: Star required for codecommit:ListRepositories and Glue actions.
315 |   
316 |   CodeBuild:
317 |     Type: AWS::CodeBuild::Project
318 |     Properties:
319 |       Name: !Sub ${Project}Setup
320 |       Artifacts:
321 |         Type: NO_ARTIFACTS
322 |       Source:
323 |         Type: NO_SOURCE
324 |         BuildSpec: !Sub |
325 |           version: 0.2
326 |           phases:
327 |             install:
328 |               commands:
329 |                 - git config --global user.name automated_user
330 |                 - git config --global user.email automated_email
331 |                 - git config --global credential.helper '!aws codecommit credential-helper $@'
332 |                 - git config --global credential.UseHttpPath true
333 |                 - aws s3 cp s3://$ARTIFACT_BUCKET/$ARTIFACT_KEY_PREFIX/Solution.zip .
334 |                 - unzip Solution.zip
335 |                 - ./$SOLUTION_ACTION.sh
336 |       Environment:
337 |         ComputeType: BUILD_GENERAL1_SMALL
338 |         EnvironmentVariables:
339 |           - Name: SOLUTION_ACTION
340 |             Value: setup
341 |           - Name: PROJECT_NAME
342 |             Value: !Ref Project
343 |           - Name: ARTIFACT_BUCKET
344 |             Value: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]]
345 |           - Name: ARTIFACT_KEY_PREFIX
346 |             Value: !FindInMap ["SourceCode", "General", "KeyPrefix"]
347 |         Image: aws/codebuild/standard:6.0
348 |         Type: LINUX_CONTAINER
349 |       ServiceRole: !Sub ${CodeBuildRole}
350 |       TimeoutInMinutes: 60
351 |     Metadata:
352 |       cfn_nag:
353 |         rules_to_suppress:
354 |           - id: W32
355 |             reason: Customer can enable encryption if desired.
356 | 


--------------------------------------------------------------------------------
/deployment/run-unit-tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This assumes all of the OS-level configuration has been completed and git repo has already been cloned
 4 | #
 5 | # This script should be run from the repo's deployment directory
 6 | # cd deployment
 7 | # ./run-unit-tests.sh
 8 | #
 9 | 
10 | # Get reference for all important folders
11 | template_dir="$PWD"
12 | source_dir="$template_dir/../source"
13 | 
14 | echo "------------------------------------------------------------------------------"
15 | echo "[Init] Clean old dist and node_modules folders"
16 | echo "------------------------------------------------------------------------------"
17 | 
18 | echo "------------------------------------------------------------------------------"
19 | echo "[Test] Services - Example Function"
20 | echo "------------------------------------------------------------------------------"
21 | 


--------------------------------------------------------------------------------
/multi-omics.code-workspace:
--------------------------------------------------------------------------------
1 | {
2 | 	"folders": [
3 | 		{
4 | 			"path": "."
5 | 		}
6 | 	],
7 | 	"settings": {}
8 | }


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/code_cfn.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: 2010-09-09
  2 | 
  3 | Description: GenomicsAnalysisCode
  4 | 
  5 | Parameters:
  6 |   ResourcePrefix:
  7 |     Type: String
  8 |     Default: GenomicsAnalysis
  9 |   ResourcePrefixLowercase:
 10 |     Type: String
 11 |     Default: genomicsanalysis
 12 |   ResourcesBucket:
 13 |     Type: String
 14 |   DataLakeBucket:
 15 |     Type: String
 16 |   DatabaseAdministrator:
 17 |     Type: String
 18 | 
 19 | Resources:
 20 | 
 21 |   JobRole:
 22 |     Type: AWS::IAM::Role
 23 |     Properties:
 24 |       AssumeRolePolicyDocument:
 25 |         Version: 2012-10-17
 26 |         Statement:
 27 |           - Effect: Allow
 28 |             Principal:
 29 |               Service:
 30 |                 - glue.amazonaws.com
 31 |             Action:
 32 |               - sts:AssumeRole
 33 |       Path: /
 34 |       ManagedPolicyArns:
 35 |         - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
 36 |       Policies:
 37 |         - PolicyName: s3_access
 38 |           PolicyDocument:
 39 |             Version: 2012-10-17
 40 |             Statement:
 41 |               - Effect: Allow
 42 |                 Action:
 43 |                   - athena:StartQueryExecution
 44 |                   - athena:GetQueryExecution
 45 |                   - athena:GetQueryResults
 46 |                 Resource:
 47 |                   - !Sub arn:aws:athena:${AWS::Region}:${AWS::AccountId}*
 48 |               - Effect: Allow
 49 |                 Action:
 50 |                   - s3:GetObject
 51 |                   - s3:ListBucket
 52 |                 Resource:
 53 |                   - !Sub arn:aws:s3:::${ResourcesBucket}
 54 |                   - !Sub arn:aws:s3:::${ResourcesBucket}/*
 55 |               - Effect: Allow
 56 |                 Action:
 57 |                   - s3:PutObject
 58 |                   - s3:GetObject
 59 |                   - s3:ListBucket
 60 |                   - s3:DeleteObject
 61 |                 Resource:
 62 |                   - !Sub arn:aws:s3:::${DataLakeBucket}
 63 |                   - !Sub arn:aws:s3:::${DataLakeBucket}/*
 64 |         - PolicyName: kms_access
 65 |           PolicyDocument:
 66 |             Version: 2012-10-17
 67 |             Statement:
 68 |               - Effect: Allow
 69 |                 Action:
 70 |                   - kms:GenerateDataKey
 71 |                   - kms:Decrypt
 72 |                   - kms:Encrypt
 73 |                 Resource:
 74 |                   - !GetAtt DataCatalogEncryptionKey.Arn
 75 | 
 76 |   RunbookRole:
 77 |     Type: AWS::IAM::Role
 78 |     Properties:
 79 |       AssumeRolePolicyDocument:
 80 |         Version: 2012-10-17
 81 |         Statement:
 82 |           - Effect: Allow
 83 |             Principal:
 84 |               Service:
 85 |                 - sagemaker.amazonaws.com
 86 |             Action:
 87 |               - sts:AssumeRole
 88 |       Path: /
 89 |       Policies:
 90 |         - PolicyName: logs_access
 91 |           PolicyDocument:
 92 |             Version: 2012-10-17
 93 |             Statement:
 94 |               - Effect: Allow
 95 |                 Action:
 96 |                   - logs:CreateLogStream
 97 |                   - logs:DescribeLogStreams
 98 |                   - logs:CreateLogGroup
 99 |                   - logs:PutLogEvents
100 |                 Resource:
101 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/sagemaker/*
102 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/sagemaker/*:log-stream:aws-glue-*
103 |         - PolicyName: s3_access
104 |           PolicyDocument:
105 |             Version: 2012-10-17
106 |             Statement:
107 |               - Effect: Allow
108 |                 Action:
109 |                   - s3:ListBucket
110 |                   - s3:GetBucketLocation
111 |                 Resource:
112 |                   - !Sub arn:aws:s3:::${DataLakeBucket}
113 |                   - !Sub arn:aws:s3:::${ResourcesBucket}
114 |               - Effect: Allow
115 |                 Action:
116 |                   - s3:GetObject
117 |                   - s3:GetObjectAcl
118 |                   - s3:PutObject
119 |                   - s3:DeleteObject
120 |                 Resource:
121 |                   - !Sub arn:aws:s3:::${DataLakeBucket}/*
122 |               - Effect: Allow
123 |                 Action:
124 |                   - s3:GetObject
125 |                 Resource:
126 |                   - !Sub arn:aws:s3:::${ResourcesBucket}/*
127 |         - PolicyName: glue_access
128 |           PolicyDocument:
129 |             Version: 2012-10-17
130 |             Statement:
131 |               - Effect: Allow
132 |                 Action:
133 |                   - glue:StartCrawler
134 |                   - glue:StartJobRun
135 |                   - glue:StartTrigger
136 |                 Resource:
137 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:crawler/${ResourcePrefixLowercase}*
138 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:job/${ResourcePrefixLowercase}*
139 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:trigger/${ResourcePrefixLowercase}*
140 |               - Effect: Allow
141 |                 Action:
142 |                   - kms:GenerateDataKey
143 |                   - kms:Decrypt
144 |                   - kms:Encrypt
145 |                 Resource:
146 |                   - !GetAtt DataCatalogEncryptionKey.Arn
147 |         - PolicyName: glue_table_access
148 |           PolicyDocument:
149 |             Version: 2012-10-17
150 |             Statement:
151 |               - Effect: Allow
152 |                 Action:
153 |                   - glue:GetDatabases
154 |                   - glue:GetDatabase
155 |                   - glue:GetTables
156 |                   - glue:GetTable
157 |                   - lakeformation:GetDataAccess
158 |                 Resource: '*'
159 |               - Effect: Allow
160 |                 Action:
161 |                   - glue:CreateDatabase
162 |                 Resource:
163 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/default
164 |               - Effect: Allow
165 |                 Action:
166 |                   - glue:GetTable
167 |                   - glue:GetTables
168 |                   - glue:CreateTable
169 |                   - glue:UpdateTable
170 |                   - glue:DeleteTable
171 |                   - glue:GetDatabase
172 |                   - glue:GetPartition
173 |                   - glue:GetPartitions
174 |                   - glue:GetDevEndpoint
175 |                   - glue:GetDevEndpoints
176 |                   - glue:UpdateDevEndpoint
177 |                 Resource:
178 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:catalog
179 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/${ResourcePrefixLowercase}
180 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:table/${ResourcePrefixLowercase}/*
181 |                   - !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:devEndpoint/*
182 |         - PolicyName: athena_access
183 |           PolicyDocument:
184 |             Version: 2012-10-17
185 |             Statement:
186 |               - Effect: Allow
187 |                 Action:
188 |                   - athena:StartQueryExecution
189 |                   - athena:GetQueryExecution
190 |                   - athena:GetQueryResults
191 |                   - athena:GetWorkGroup
192 |                 Resource:
193 |                   - !Sub arn:aws:athena:${AWS::Region}:${AWS::AccountId}:workgroup/${ResourcePrefixLowercase}-${AWS::Region}
194 | 
195 |         - PolicyName: cfn_access
196 |           PolicyDocument:
197 |             Version: 2012-10-17
198 |             Statement:
199 |               - Effect: Allow
200 |                 Action:
201 |                   - cloudformation:DescribeStacks
202 |                 Resource:
203 |                   - !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${ResourcePrefix}*
204 |         - PolicyName: kms_access
205 |           PolicyDocument:
206 |             Version: 2012-10-17
207 |             Statement:
208 |               - Effect: Allow
209 |                 Action:
210 |                   - kms:GenerateDataKey
211 |                   - kms:Decrypt
212 |                   - kms:Encrypt
213 |                 Resource:
214 |                   - !GetAtt DataCatalogEncryptionKey.Arn
215 |                   
216 |   WorkGroup:                
217 |     Type: AWS::Athena::WorkGroup
218 |     Properties: 
219 |       Description: !Sub ${ResourcePrefixLowercase}
220 |       Name: !Sub ${ResourcePrefixLowercase}-${AWS::Region}
221 |       RecursiveDeleteOption: True
222 |       WorkGroupConfiguration: 
223 |         EngineVersion:
224 |           EffectiveEngineVersion: "Athena engine version 3"
225 |           SelectedEngineVersion: "Athena engine version 3"
226 |         ResultConfiguration: 
227 |           OutputLocation: !Sub s3://${DataLakeBucket}/results                  
228 |               
229 | 
230 |   DataCatalogEncryptionKey:
231 |     DeletionPolicy: Retain
232 |     Type: AWS::KMS::Key
233 |     Properties:
234 |       Description: KMS key used to encrypt the Glue data catalog
235 |       Enabled: True
236 |       EnableKeyRotation: True
237 |       KeyPolicy: !Sub |
238 |         {
239 |             "Version": "2012-10-17",
240 |             "Id": "TestGlueCatalogEncryptionKeyPolicy",
241 |             "Statement": [
242 |                 {
243 |                     "Sid": "Enable IAM User Permissions",
244 |                     "Effect": "Allow",
245 |                     "Principal": {
246 |                         "AWS": [
247 |                           "arn:aws:iam::${AWS::AccountId}:root",
248 |                           "${DatabaseAdministrator}"
249 |                         ]
250 |                     },
251 |                     "Action": "kms:*",
252 |                     "Resource": "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/*"
253 |                 },
254 |                 {
255 |                     "Sid": "Allow access for Key Administrators",
256 |                     "Effect": "Allow",
257 |                     "Principal": {
258 |                         "AWS": [
259 |                           "arn:aws:iam::${AWS::AccountId}:root",
260 |                           "${DatabaseAdministrator}"
261 |                         ]
262 |                     },
263 |                     "Action": [
264 |                         "kms:Create*",
265 |                         "kms:Describe*",
266 |                         "kms:Enable*",
267 |                         "kms:List*",
268 |                         "kms:Put*",
269 |                         "kms:Update*",
270 |                         "kms:Revoke*",
271 |                         "kms:Disable*",
272 |                         "kms:Get*",
273 |                         "kms:Delete*",
274 |                         "kms:TagResource",
275 |                         "kms:UntagResource",
276 |                         "kms:ScheduleKeyDeletion",
277 |                         "kms:CancelKeyDeletion"
278 |                     ],
279 |                     "Resource": "*"
280 |                 },
281 |                 {
282 |                     "Sid": "Allow use of the key",
283 |                     "Effect": "Allow",
284 |                     "Principal": {
285 |                         "Service": "logs.${AWS::Region}.amazonaws.com"
286 |                     },
287 |                     "Action": [
288 |                         "kms:Encrypt",
289 |                         "kms:Decrypt",
290 |                         "kms:ReEncrypt*",
291 |                         "kms:GenerateDataKey*",
292 |                         "kms:DescribeKey"
293 |                     ],
294 |                     "Resource": "*"
295 |                 },
296 |                 {
297 |                     "Sid": "Allow use of the key",
298 |                     "Effect": "Allow",
299 |                     "Principal": "*",
300 |                     "Action": [
301 |                         "kms:Encrypt",
302 |                         "kms:Decrypt",
303 |                         "kms:ReEncrypt*",
304 |                         "kms:GenerateDataKey*",
305 |                         "kms:DescribeKey"
306 |                     ],
307 |                     "Resource": "*",
308 |                     "Condition": {
309 |                         "ArnEquals": {
310 |                             "aws:PrincipalARN": "arn:aws:iam::${AWS::AccountId}:role/${ResourcePrefix}*"
311 |                         }
312 |                     }
313 |                 }
314 |             ]
315 |         }
316 | 
317 |   DataCatalogEncryptionSettings:
318 |     Type: AWS::Glue::DataCatalogEncryptionSettings
319 |     DependsOn: DataCatalogEncryptionKey
320 |     Properties:
321 |       CatalogId: !Ref AWS::AccountId
322 |       DataCatalogEncryptionSettings:
323 |         EncryptionAtRest:
324 |           CatalogEncryptionMode: SSE-KMS
325 |           SseAwsKmsKeyId: !Ref DataCatalogEncryptionKey
326 | 
327 |   SecurityConfiguration:
328 |     Type: AWS::Glue::SecurityConfiguration
329 |     Properties:
330 |       EncryptionConfiguration:
331 |         CloudWatchEncryption:
332 |           CloudWatchEncryptionMode: SSE-KMS
333 |           KmsKeyArn: !GetAtt DataCatalogEncryptionKey.Arn
334 |         JobBookmarksEncryption:
335 |           JobBookmarksEncryptionMode: CSE-KMS
336 |           KmsKeyArn: !GetAtt DataCatalogEncryptionKey.Arn
337 |         S3Encryptions:
338 |             - S3EncryptionMode: SSE-KMS
339 |               KmsKeyArn: !GetAtt DataCatalogEncryptionKey.Arn
340 |       Name: !Sub ${ResourcePrefix}SecurityConfiguration
341 | 
342 |   DataCatalog:
343 |     Type: AWS::Glue::Database
344 |     DependsOn: DataCatalogEncryptionKey
345 |     Properties:
346 |       CatalogId: !Ref AWS::AccountId
347 |       DatabaseInput:
348 |         Name: !Sub ${ResourcePrefixLowercase}
349 |         Description: Data catalog for Human NGS Tertiary Analysis and Data Lakes solution
350 | 
351 |   RunbookLifecycle:
352 |     Type: AWS::SageMaker::NotebookInstanceLifecycleConfig
353 |     Properties:
354 |       NotebookInstanceLifecycleConfigName: !Sub ${ResourcePrefixLowercase}Runbook
355 |       OnStart:
356 |         - Content: !Base64
357 |             Fn::Sub: |
358 |               #!/bin/bash
359 |               cd /home/ec2-user/SageMaker
360 |               set -e
361 |               aws s3 sync s3://${ResourcesBucket}/notebooks .
362 |               chmod 666 *.ipynb
363 |               echo "export RESOURCE_PREFIX='${ResourcePrefix}'" > /home/ec2-user/anaconda3/envs/python3/etc/conda/activate.d/env_vars.sh
364 | 
365 |   Runbook:
366 |     Type: AWS::SageMaker::NotebookInstance
367 |     Properties:
368 |       NotebookInstanceName: !Sub ${ResourcePrefixLowercase}Runbook
369 |       InstanceType: ml.t2.medium
370 |       LifecycleConfigName: !GetAtt RunbookLifecycle.NotebookInstanceLifecycleConfigName
371 |       RoleArn: !GetAtt RunbookRole.Arn
372 |       PlatformIdentifier: notebook-al2-v2
373 | 
374 |       
375 | Outputs:
376 |   DataCatalogEncryptionKeyArn:
377 |     Value: !GetAtt DataCatalogEncryptionKey.Arn
378 |     Export:
379 |       Name: !Sub "${ResourcePrefix}-DataCatalogEncryptionKeyArn"
380 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/copyresources_buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | phases:
 3 |   install:
 4 |     runtime-versions:
 5 |           python: 3.8
 6 |     commands:
 7 |       - wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | apt-key add -
 8 |       - apt-get update -y
 9 |   build:
10 |     commands:
11 |       - aws s3 sync ./resources s3://${RESOURCES_BUCKET} --size-only
12 | artifacts:
13 |   files:
14 |     - quicksight_cfn.yml
15 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/omics_cfn.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: 2010-09-09
  2 | 
  3 | Description: Omics Resources
  4 | Parameters:
  5 |   OmicsResourcePrefix:
  6 |     Type: String
  7 |     Default: omics-cfn
  8 |   OmicsResourcesS3Bucket:
  9 |     Type: String
 10 |   OmicsDataS3Bucket:
 11 |     Type: String
 12 |   OmicsReferenceS3Key:
 13 |     Type: String
 14 |     Default: references/hg38/Homo_sapiens_assembly38.fasta
 15 |   OmicsReferenceName:
 16 |     Type: String
 17 |     Default: hg38
 18 |   ClinvarS3Key:
 19 |     Type: String
 20 |     Default: annotation/vcf/clinvar/clinvar.vcf.gz
 21 |   SampleVcfS3Key:
 22 |     Type: String
 23 |     Default: variants/vcf/variants.vcf.gz
 24 |   OneKgVcfS3Key:
 25 |     Type: String
 26 |     Default: variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz
 27 |   OmicsAnnotationStoreName:
 28 |     Type: String
 29 |     Default: omicsannotationstore
 30 |   OmicsVariantStoreName:
 31 |     Type: String
 32 |     Default: omicsvariantstore
 33 |   AnnotationStoreFormat:
 34 |     Type: String
 35 |     Default: 'VCF'
 36 | 
 37 | Resources:
 38 | 
 39 | # OMICS CREATE REFERENCE STORE
 40 |   OmicsReferenceStore:
 41 |     Type: Custom::OmicsReferenceStore
 42 |     DependsOn:
 43 |       - OmicsReferenceStoreLambda
 44 |     Version: 1.0
 45 |     Properties:
 46 |       ServiceToken: !Sub ${OmicsReferenceStoreLambda.Arn}
 47 |       ReferenceStoreName: !Sub ${OmicsResourcePrefix}-reference-store
 48 |   
 49 |   OmicsReferenceStoreLambda:
 50 |     Type: AWS::Lambda::Function
 51 |     DependsOn:
 52 |       - OmicsReferenceStoreLambdaRole
 53 |     Properties:
 54 |       Handler: create_reference_store_lambda.handler
 55 |       Runtime: python3.9
 56 |       FunctionName: !Sub ${OmicsResourcePrefix}-create-reference-store
 57 |       Code:
 58 |         S3Bucket: !Sub ${OmicsResourcesS3Bucket}
 59 |         S3Key: artifacts/create_reference_store_lambda.zip
 60 |       Role: !Sub ${OmicsReferenceStoreLambdaRole.Arn}
 61 |       Timeout: 60
 62 |   
 63 |   OmicsReferenceStoreLambdaRole:
 64 |     Type: AWS::IAM::Role
 65 |     Properties:
 66 |       AssumeRolePolicyDocument:
 67 |         Version: 2012-10-17
 68 |         Statement:
 69 |           - Action:
 70 |               - sts:AssumeRole
 71 |             Effect: Allow
 72 |             Principal:
 73 |               Service:
 74 |                 - lambda.amazonaws.com
 75 |       Path: /
 76 |       Policies:
 77 |         - PolicyName: CreateReferenceStorePolicy
 78 |           PolicyDocument:
 79 |             Statement:
 80 |               - Effect: Allow
 81 |                 Action:
 82 |                   - logs:CreateLogGroup
 83 |                   - logs:CreateLogStream
 84 |                   - logs:PutLogEvents
 85 |                 Resource:
 86 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*
 87 |               - Effect: Allow
 88 |                 Action: 
 89 |                   - omics:*
 90 |                 Resource: '*'
 91 |               - Effect: Allow
 92 |                 Action:
 93 |                   - lambda:AddPermission
 94 |                   - lambda:RemovePermission
 95 |                   - events:PutRule
 96 |                   - events:DeleteRule
 97 |                   - events:PutTargets
 98 |                   - events:RemoveTargets
 99 |                 Resource: '*'
100 |   
101 |   # OMICS IMPORT REFERENCE
102 |   OmicsImportReference:
103 |     Type: Custom::OmicsImportReference
104 |     DependsOn:
105 |       - OmicsReferenceStore
106 |       - OmicsImportReferenceLambda
107 |     Version: 1.0
108 |     Properties:
109 |       ServiceToken: !Sub ${OmicsImportReferenceLambda.Arn}
110 |       ReferenceStoreId: !Sub ${OmicsReferenceStore.ReferenceStoreId}
111 |       ReferenceName: !Sub ${OmicsReferenceName}
112 |       OmicsImportReferenceRoleArn: !Sub ${OmicsImportReferenceJobRole.Arn}
113 |       ReferenceSourceS3Uri: !Sub s3://${OmicsDataS3Bucket}/${OmicsReferenceS3Key}
114 |   
115 |   OmicsImportReferenceLambda:
116 |     Type: AWS::Lambda::Function
117 |     DependsOn:
118 |       - OmicsImportReferenceLambdaRole
119 |       - OmicsImportReferenceJobRole
120 |     Properties:
121 |       Handler: import_reference_lambda.handler
122 |       Runtime: python3.9
123 |       FunctionName: !Sub ${OmicsResourcePrefix}-import-reference
124 |       Code:
125 |         S3Bucket: !Sub ${OmicsResourcesS3Bucket}
126 |         S3Key: artifacts/import_reference_lambda.zip
127 |       Role: !Sub ${OmicsImportReferenceLambdaRole.Arn}
128 |       Timeout: 60
129 |   
130 |   OmicsImportReferenceLambdaRole:
131 |     Type: AWS::IAM::Role
132 |     DependsOn:
133 |       - OmicsImportReferenceJobRole
134 |     Properties:
135 |       AssumeRolePolicyDocument:
136 |         Version: 2012-10-17
137 |         Statement:
138 |           - Action:
139 |               - sts:AssumeRole
140 |             Effect: Allow
141 |             Principal:
142 |               Service:
143 |                 - lambda.amazonaws.com
144 |       Path: /
145 |       Policies:
146 |         - PolicyName: ImportReferencePolicy
147 |           PolicyDocument:
148 |             Statement:
149 |               - Effect: Allow
150 |                 Action:
151 |                   - logs:CreateLogGroup
152 |                   - logs:CreateLogStream
153 |                   - logs:PutLogEvents
154 |                 Resource:
155 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*
156 |               - Effect: Allow
157 |                 Action: 
158 |                   - omics:*
159 |                 Resource: '*'
160 |               - Effect: Allow
161 |                 Action:
162 |                   - lambda:AddPermission
163 |                   - lambda:RemovePermission
164 |                   - events:PutRule
165 |                   - events:DeleteRule
166 |                   - events:PutTargets
167 |                   - events:RemoveTargets
168 |                 Resource: '*'
169 |               - Effect: Allow
170 |                 Action:
171 |                   - iam:GetRole
172 |                   - iam:PassRole
173 |                 Resource: !Sub ${OmicsImportReferenceJobRole.Arn}
174 | 
175 |   OmicsImportReferenceJobRole:
176 |     Type: AWS::IAM::Role
177 |     Properties:
178 |       AssumeRolePolicyDocument:
179 |         Version: 2012-10-17
180 |         Statement:
181 |           - Action:
182 |               - sts:AssumeRole
183 |             Effect: Allow
184 |             Principal:
185 |               Service:
186 |                 - omics.amazonaws.com
187 |       Path: /
188 |       Policies:
189 |         - PolicyName: ImportReferenceJobRolePolicy
190 |           PolicyDocument:
191 |             Statement:
192 |               - Effect: Allow
193 |                 Action:
194 |                   - s3:GetObject
195 |                   - s3:GetBucketLocation
196 |                   - s3:ListBucket
197 |                 Resource:
198 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}
199 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}/${OmicsReferenceS3Key}
200 |               
201 |   # OMICS CREATE VARIANT STORE
202 |   OmicsVariantStore:
203 |     Type: Custom::OmicsVariantStore
204 |     DependsOn:
205 |       - OmicsVariantStoreLambda
206 |       - OmicsImportReference
207 |     Version: 1.0
208 |     Properties:
209 |       ServiceToken: !Sub ${OmicsVariantStoreLambda.Arn}
210 |       VariantStoreName: !Sub ${OmicsVariantStoreName}
211 |       ReferenceArn: !Sub ${OmicsImportReference.Arn}
212 | 
213 |   OmicsVariantStoreLambda:
214 |     Type: AWS::Lambda::Function
215 |     DependsOn:
216 |       - OmicsVariantStoreLambdaRole
217 |     Properties:
218 |       Handler: create_variant_store_lambda.handler
219 |       Runtime: python3.9
220 |       FunctionName: !Sub ${OmicsResourcePrefix}-create-variant-store
221 |       Code:
222 |         S3Bucket: !Sub ${OmicsResourcesS3Bucket}
223 |         S3Key: artifacts/create_variant_store_lambda.zip
224 |       Role: !Sub ${OmicsVariantStoreLambdaRole.Arn}
225 |       Timeout: 60
226 |       
227 |   OmicsVariantStoreLambdaRole:
228 |     Type: AWS::IAM::Role
229 |     Properties:
230 |       AssumeRolePolicyDocument:
231 |         Version: 2012-10-17
232 |         Statement:
233 |           - Action:
234 |               - sts:AssumeRole
235 |             Effect: Allow
236 |             Principal:
237 |               Service:
238 |                 - lambda.amazonaws.com
239 |       Path: /
240 |       Policies:
241 |         - PolicyName: CreateVariantStorePolicy
242 |           PolicyDocument:
243 |             Statement:
244 |               - Effect: Allow
245 |                 Action:
246 |                   - logs:CreateLogGroup
247 |                   - logs:CreateLogStream
248 |                   - logs:PutLogEvents
249 |                 Resource:
250 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*
251 |               - Effect: Allow
252 |                 Action: 
253 |                   - omics:*
254 |                 Resource: '*'
255 |               - Effect: Allow
256 |                 Action:
257 |                   - lambda:AddPermission
258 |                   - lambda:RemovePermission
259 |                   - events:PutRule
260 |                   - events:DeleteRule
261 |                   - events:PutTargets
262 |                   - events:RemoveTargets
263 |                 Resource: '*'
264 |               - Effect: Allow
265 |                 Action:
266 |                   - ram:GetResourceShareInvitations
267 |                   - ram:AcceptResourceShareInvitation
268 |                 Resource: '*'
269 |   
270 |   # OMICS IMPORT VARIANT (sample and 1kg) JOB
271 |   OmicsImportVariantOneKg:
272 |     Type: Custom::OmicsImportVariantOneKg
273 |     DependsOn:
274 |       - OmicsVariantStore
275 |       - OmicsImportVariantLambda
276 |     Version: 1.0
277 |     Properties:
278 |       ServiceToken: !Sub ${OmicsImportVariantLambda.Arn}
279 |       VariantStoreName: !Sub ${OmicsVariantStoreName}
280 |       OmicsImportVariantRoleArn: !Sub ${OmicsImportVariantJobRole.Arn}
281 |       VcfS3Uri: !Sub s3://${OmicsDataS3Bucket}/${OneKgVcfS3Key}
282 | 
283 |   OmicsImportVariantSampleVcf:
284 |     Type: Custom::OmicsImportVariantSampleVcf
285 |     DependsOn:
286 |       - OmicsVariantStore
287 |       - OmicsImportVariantLambda
288 |     Version: 1.0
289 |     Properties:
290 |       ServiceToken: !Sub ${OmicsImportVariantLambda.Arn}
291 |       VariantStoreName: !Sub ${OmicsVariantStoreName}
292 |       OmicsImportVariantRoleArn: !Sub ${OmicsImportVariantJobRole.Arn}
293 |       VcfS3Uri: !Sub s3://${OmicsDataS3Bucket}/${SampleVcfS3Key}
294 | 
295 |   OmicsImportVariantLambda:
296 |     Type: AWS::Lambda::Function
297 |     DependsOn:
298 |       - OmicsImportVariantLambdaRole
299 |     Properties:
300 |       Handler: import_variant_lambda.handler
301 |       Runtime: python3.9
302 |       FunctionName: !Sub ${OmicsResourcePrefix}-import-variant
303 |       Code:
304 |         S3Bucket: !Sub ${OmicsResourcesS3Bucket}
305 |         S3Key: artifacts/import_variant_lambda.zip
306 |       Role: !Sub ${OmicsImportVariantLambdaRole.Arn}
307 |       Timeout: 60
308 |   
309 |   OmicsImportVariantLambdaRole:
310 |     Type: AWS::IAM::Role
311 |     DependsOn:
312 |       - OmicsImportVariantJobRole
313 |     Properties:
314 |       AssumeRolePolicyDocument:
315 |         Version: 2012-10-17
316 |         Statement:
317 |           - Action:
318 |               - sts:AssumeRole
319 |             Effect: Allow
320 |             Principal:
321 |               Service:
322 |                 - lambda.amazonaws.com
323 |       Path: /
324 |       Policies:
325 |         - PolicyName: ImportVariantPolicy
326 |           PolicyDocument:
327 |             Statement:
328 |               - Effect: Allow
329 |                 Action:
330 |                   - logs:CreateLogGroup
331 |                   - logs:CreateLogStream
332 |                   - logs:PutLogEvents
333 |                 Resource:
334 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*
335 |               - Effect: Allow
336 |                 Action: 
337 |                   - omics:*
338 |                 Resource: '*'
339 |               - Effect: Allow
340 |                 Action:
341 |                   - lambda:AddPermission
342 |                   - lambda:RemovePermission
343 |                   - events:PutRule
344 |                   - events:DeleteRule
345 |                   - events:PutTargets
346 |                   - events:RemoveTargets
347 |                 Resource: '*'
348 |               - Effect: Allow
349 |                 Action:
350 |                   - iam:GetRole
351 |                   - iam:PassRole
352 |                 Resource: !Sub ${OmicsImportVariantJobRole.Arn}
353 | 
354 |   OmicsImportVariantJobRole:
355 |     Type: AWS::IAM::Role
356 |     Properties:
357 |       AssumeRolePolicyDocument:
358 |         Version: 2012-10-17
359 |         Statement:
360 |           - Action:
361 |               - sts:AssumeRole
362 |             Effect: Allow
363 |             Principal:
364 |               Service:
365 |                 - omics.amazonaws.com
366 |       Path: /
367 |       Policies:
368 |         - PolicyName: OmicsImportVariantJobRolePolicy
369 |           PolicyDocument:
370 |             Statement:
371 |               - Effect: Allow
372 |                 Action:
373 |                   - s3:GetObject
374 |                   - s3:GetBucketLocation
375 |                   - s3:ListBucket
376 |                 Resource:
377 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}
378 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}/${SampleVcfS3Key}
379 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}/${OneKgVcfS3Key}
380 |               - Effect: Allow
381 |                 Action:
382 |                   - omics:ListReferences
383 |                   - omics:GetReference
384 |                   - omics:GetReferenceMetadata
385 |                 Resource: '*'
386 | 
387 |   # OMICS CREATE ANNOTATION STORE
388 |   OmicsAnnotationStore:
389 |     Type: Custom::OmicsAnnotationStore
390 |     DependsOn:
391 |       - OmicsAnnotationStoreLambda
392 |       - OmicsImportReference
393 |     Version: 1.0
394 |     Properties:
395 |       ServiceToken: !Sub ${OmicsAnnotationStoreLambda.Arn}
396 |       AnnotationStoreName: !Sub ${OmicsAnnotationStoreName}
397 |       ReferenceArn: !Sub ${OmicsImportReference.Arn}
398 |       AnnotationStoreFormat: !Sub ${AnnotationStoreFormat}
399 |   
400 |   OmicsAnnotationStoreLambda:
401 |     Type: AWS::Lambda::Function
402 |     DependsOn:
403 |       - OmicsAnnotationStoreLambdaRole
404 |     Properties:
405 |       Handler: create_annotation_store_lambda.handler
406 |       Runtime: python3.9
407 |       FunctionName: !Sub ${OmicsResourcePrefix}-create-annotation-store-v2
408 |       Code:
409 |         S3Bucket: !Sub ${OmicsResourcesS3Bucket}
410 |         S3Key: artifacts/create_annotation_store_lambda.zip
411 |       Role: !Sub ${OmicsAnnotationStoreLambdaRole.Arn}
412 |       Timeout: 60
413 |   
414 |   OmicsAnnotationStoreLambdaRole:
415 |     Type: AWS::IAM::Role
416 |     Properties:
417 |       AssumeRolePolicyDocument:
418 |         Version: 2012-10-17
419 |         Statement:
420 |           - Action:
421 |               - sts:AssumeRole
422 |             Effect: Allow
423 |             Principal:
424 |               Service:
425 |                 - lambda.amazonaws.com
426 |       Path: /
427 |       Policies:
428 |         - PolicyName: CreateAnnotationPolicy
429 |           PolicyDocument:
430 |             Statement:
431 |               - Effect: Allow
432 |                 Action:
433 |                   - logs:CreateLogGroup
434 |                   - logs:CreateLogStream
435 |                   - logs:PutLogEvents
436 |                 Resource:
437 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*
438 |               - Effect: Allow
439 |                 Action: 
440 |                   - omics:*
441 |                 Resource: '*'
442 |               - Effect: Allow
443 |                 Action:
444 |                   - lambda:AddPermission
445 |                   - lambda:RemovePermission
446 |                   - events:PutRule
447 |                   - events:DeleteRule
448 |                   - events:PutTargets
449 |                   - events:RemoveTargets
450 |                 Resource: '*'
451 |               - Effect: Allow
452 |                 Action:
453 |                   - ram:GetResourceShareInvitations
454 |                   - ram:AcceptResourceShareInvitation
455 |                 Resource: '*'
456 |   
457 |   # OMICS CREATE IMPORT ANNOTATION JOB
458 |   OmicsImportAnnotation:
459 |     Type: Custom::OmicsImportAnnotation
460 |     DependsOn:
461 |       - OmicsAnnotationStore
462 |       - OmicsImportAnnotationLambda
463 |     Version: 1.0
464 |     Properties:
465 |       ServiceToken: !Sub ${OmicsImportAnnotationLambda.Arn}
466 |       AnnotationStoreName: !Sub ${OmicsAnnotationStoreName}
467 |       OmicsImportAnnotationRoleArn: !Sub ${OmicsImportAnnotationJobRole.Arn}
468 |       AnnotationSourceS3Uri: !Sub s3://${OmicsDataS3Bucket}/${ClinvarS3Key}
469 |     
470 |   OmicsImportAnnotationLambda:
471 |     Type: AWS::Lambda::Function
472 |     DependsOn:
473 |       - OmicsImportAnnotationLambdaRole
474 |     Properties:
475 |       Handler: import_annotation_lambda.handler
476 |       Runtime: python3.9
477 |       FunctionName: !Sub ${OmicsResourcePrefix}-import-annotation
478 |       Code:
479 |         S3Bucket: !Sub ${OmicsResourcesS3Bucket}
480 |         S3Key: artifacts/import_annotation_lambda.zip
481 |       Role: !Sub ${OmicsImportAnnotationLambdaRole.Arn}
482 |       Timeout: 60
483 |   
484 |   OmicsImportAnnotationLambdaRole:
485 |     Type: AWS::IAM::Role
486 |     DependsOn:
487 |       - OmicsImportAnnotationJobRole
488 |     Properties:
489 |       AssumeRolePolicyDocument:
490 |         Version: 2012-10-17
491 |         Statement:
492 |           - Action:
493 |               - sts:AssumeRole
494 |             Effect: Allow
495 |             Principal:
496 |               Service:
497 |                 - lambda.amazonaws.com
498 |       Path: /
499 |       Policies:
500 |         - PolicyName: ImportAnnotationPolicy
501 |           PolicyDocument:
502 |             Statement:
503 |               - Effect: Allow
504 |                 Action:
505 |                   - logs:CreateLogGroup
506 |                   - logs:CreateLogStream
507 |                   - logs:PutLogEvents
508 |                 Resource:
509 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*
510 |               - Effect: Allow
511 |                 Action: 
512 |                   - omics:*
513 |                 Resource: '*'
514 |               - Effect: Allow
515 |                 Action:
516 |                   - lambda:AddPermission
517 |                   - lambda:RemovePermission
518 |                   - events:PutRule
519 |                   - events:DeleteRule
520 |                   - events:PutTargets
521 |                   - events:RemoveTargets
522 |                 Resource: '*'
523 |               - Effect: Allow
524 |                 Action:
525 |                   - iam:GetRole
526 |                   - iam:PassRole
527 |                 Resource: !Sub ${OmicsImportAnnotationJobRole.Arn}
528 |   
529 |   OmicsImportAnnotationJobRole:
530 |     Type: AWS::IAM::Role
531 |     Properties:
532 |       AssumeRolePolicyDocument:
533 |         Version: 2012-10-17
534 |         Statement:
535 |           - Action:
536 |               - sts:AssumeRole
537 |             Effect: Allow
538 |             Principal:
539 |               Service:
540 |                 - omics.amazonaws.com
541 |       Path: /
542 |       Policies:
543 |         - PolicyName: ImportAnnotationJobRolePolicy
544 |           PolicyDocument:
545 |             Statement:
546 |               - Effect: Allow
547 |                 Action:
548 |                   - s3:GetObject
549 |                   - s3:GetBucketLocation
550 |                   - s3:ListBucket
551 |                 Resource:
552 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}
553 |                   - !Sub arn:aws:s3:::${OmicsDataS3Bucket}/${ClinvarS3Key}
554 |               - Effect: Allow
555 |                 Action:
556 |                   - omics:ListReferences
557 |                   - omics:GetReference
558 |                   - omics:GetReferenceMetadata
559 |                 Resource: '*'
560 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/omicsresources_buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | phases:
 3 |   install:
 4 |     runtime-versions:
 5 |       python: 3.9
 6 |   build:
 7 |     commands:
 8 |       - |
 9 |         # Declare all lambda functions
10 |         declare -a LambdaNames=("create_reference_store_lambda" "import_reference_lambda" "create_variant_store_lambda" "import_variant_lambda" "create_annotation_store_lambda" "import_annotation_lambda")
11 | 
12 |         # iterate over each lambda 
13 |         for lambda in ${LambdaNames[@]}; do
14 | 
15 |           COUNT=$(aws s3 ls s3://${RESOURCES_BUCKET}/omics/"$lambda".py | wc -l)
16 |           if [ $COUNT = 0 ]; then
17 |               echo "skipping Build, ${lambda}.py not found in  s3://${RESOURCES_BUCKET}/omics/"
18 |           else
19 |             echo "Building lambda zip for: ${lambda} "
20 |             mkdir tmp_${lambda}
21 |             cd tmp_${lambda}
22 |             echo "Download lambda py for: ${lambda} "
23 |             aws s3 cp s3://${RESOURCES_BUCKET}/omics/"$lambda".py .
24 |             echo "Installing pip packages"
25 |             pip3 install crhelper boto3==1.26.16 -t ./package
26 |             cd ./package
27 |             zip -r ../${lambda}.zip .
28 |             cd ..
29 |             echo "Zip lambda to artifact"
30 |             zip -g ${lambda}.zip ${lambda}.py
31 |             echo "Upload zip to s3://${RESOURCES_BUCKET}/artifacts/"
32 |             aws s3 cp ${lambda}.zip s3://${RESOURCES_BUCKET}/artifacts/
33 |             cd .. 
34 |             rm -rf tmp_${lambda}
35 |             echo "Done with ${lambda}"
36 |           fi
37 |         done


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/omics/create_annotation_store_lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | 
 15 | from crhelper import CfnResource
 16 | import logging
 17 | import boto3
 18 | from botocore.exceptions import ClientError
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | # Initialise the helper, all inputs are optional, this example shows the defaults
 22 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 23 | 
 24 | # Initiate client
 25 | try:
 26 |     print("Attempt to initiate client")
 27 |     omics_session = boto3.Session()
 28 |     omics_client = omics_session.client('omics')
 29 |     print("Attempt to initiate client complete")
 30 | except Exception as e:
 31 |     helper.init_failure(e)
 32 | 
 33 | 
 34 | @helper.create
 35 | def create(event, context):
 36 |     logger.info("Got Create")
 37 |     create_omics_annotation_store(event, context)
 38 | 
 39 | 
 40 | @helper.update
 41 | def update(event, context):
 42 |     logger.info("Got Update")
 43 |     create_omics_annotation_store(event, context)
 44 | 
 45 | 
 46 | @helper.delete
 47 | def delete(event, context):
 48 |     logger.info("Got Delete - attempt to delete")
 49 |     delete_omics_annotation_store(event, context)
 50 |     
 51 | @helper.poll_create
 52 | def poll_create(event, context):
 53 |     logger.info("Got Create poll")
 54 |     return check_annotation_store_status(event, context)
 55 | 
 56 | 
 57 | @helper.poll_update
 58 | def poll_update(event, context):
 59 |     logger.info("Got Update poll")
 60 |     return check_annotation_store_status(event, context)
 61 | 
 62 | 
 63 | @helper.poll_delete
 64 | def poll_delete(event, context):
 65 |     logger.info("Got Delete poll")
 66 |     return "got delete"
 67 | 
 68 | def handler(event, context):
 69 |     helper(event, context)
 70 | 
 71 | def create_omics_annotation_store(event, context):
 72 |     annotation_store_name = event['ResourceProperties']['AnnotationStoreName']
 73 |     reference_arn = event['ResourceProperties']['ReferenceArn']
 74 |     store_format = event['ResourceProperties']['AnnotationStoreFormat']
 75 |     try:
 76 |         print(f"Attempt to create annotation store: {annotation_store_name}")
 77 |         response = omics_client.create_annotation_store(
 78 |             name=annotation_store_name,
 79 |             reference={"referenceArn": reference_arn},
 80 |             storeFormat=store_format
 81 |             )
 82 |     except ClientError as e:
 83 |         raise Exception( "boto3 client error : " + e.__str__())
 84 |     except Exception as e:
 85 |        raise Exception( "Unexpected error : " +    e.__str__())
 86 |     logger.info(response)
 87 |     helper.Data.update({"AnnotationStoreId": response['id']})
 88 |     return True
 89 | 
 90 | def delete_omics_annotation_store(event, context):
 91 |     
 92 |     annotation_store_name = event['ResourceProperties']['AnnotationStoreName']
 93 |     try:
 94 |         print("Attempt to delete annotation store")
 95 |         response = omics_client.delete_annotation_store(
 96 |             name=annotation_store_name
 97 |             )
 98 |     except ClientError as e:
 99 |         raise Exception( "boto3 client error : " + e.__str__())
100 |     except Exception as e:
101 |        raise Exception( "Unexpected error : " +    e.__str__())
102 |     logger.info(response)
103 |     return helper.Data.get("AnnotationStoreId")
104 | 
105 | def check_annotation_store_status(event, context):
106 |     annotation_store_name = event['ResourceProperties']['AnnotationStoreName']
107 | 
108 |     try:
109 |         response = omics_client.get_annotation_store(name=annotation_store_name)
110 |     except ClientError as e:
111 |         raise Exception( "boto3 client error : " + e.__str__())
112 |     except Exception as e:
113 |        raise Exception( "Unexpected error : " +    e.__str__())
114 |     status = response['status']
115 |     
116 |     if status in ['CREATING', 'UPDATING', 'IN_PROGRESS']:
117 |         logger.info(status)
118 |         return None
119 |     else:
120 |         if status in ['READY', 'COMPLETED', 'ACTIVE', 'CREATED', 'COMPLETE']:
121 |             logger.info(status)
122 |             return True
123 |         else:
124 |             msg = f"Variant store; {annotation_store_name} has status {status}, exiting"
125 |             logger.info(msg)
126 |             raise ValueError(msg)  
127 | 
128 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/omics/create_reference_store_lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | 
 15 | from crhelper import CfnResource
 16 | import logging
 17 | import boto3
 18 | from botocore.exceptions import ClientError
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | # Initialise the helper, all inputs are optional, this example shows the defaults
 22 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 23 | 
 24 | 
 25 | # Initiate client
 26 | try:
 27 |     print("Attempt to initiate client")
 28 |     omics_session = boto3.Session()
 29 |     omics_client = omics_session.client('omics')
 30 |     print("Attempt to initiate client complete")
 31 | except Exception as e:
 32 |     helper.init_failure(e)
 33 | 
 34 | 
 35 | @helper.create
 36 | def create(event, context):
 37 |     logger.info("Got Create")
 38 |     create_omics_reference_store(event, context)
 39 | 
 40 | 
 41 | @helper.update
 42 | def update(event, context):
 43 |     logger.info("Got Update")
 44 |     create_omics_reference_store(event, context)
 45 | 
 46 | 
 47 | @helper.delete
 48 | def delete(event, context):
 49 |     logger.info("Got Delete - attempting to delete")
 50 |     delete_omics_reference_store(event, context)
 51 | 
 52 | @helper.poll_create
 53 | def poll_create(event, context):
 54 |     logger.info("Got Create poll")
 55 |     return True
 56 | 
 57 | 
 58 | @helper.poll_update
 59 | def poll_update(event, context):
 60 |     logger.info("Got Update poll")
 61 |     return True
 62 | 
 63 | @helper.poll_delete
 64 | def poll_delete(event, context):
 65 |     logger.info("Got Delete poll")
 66 |     return True
 67 | 
 68 | def handler(event, context):
 69 |     helper(event, context)
 70 | 
 71 | def create_omics_reference_store(event, context):
 72 |     reference_store_name = event['ResourceProperties']['ReferenceStoreName']
 73 |     try:
 74 |         print(f"Attempt to create reference store: {reference_store_name}")
 75 |         response = omics_client.create_reference_store(
 76 |             name=reference_store_name
 77 |             )
 78 |     except ClientError as e:
 79 |         raise Exception( "boto3 client error : " + e.__str__())
 80 |     except Exception as e:
 81 |        raise Exception( "Unexpected error : " +    e.__str__())
 82 |     logger.info(response)
 83 |     helper.Data.update({"ReferenceStoreArn": response['arn']})
 84 |     helper.Data.update({"ReferenceStoreId": response['id']})
 85 |     return True
 86 | 
 87 | def delete_omics_reference_store(event, context):
 88 |     reference_store_name = event['ResourceProperties']['ReferenceStoreName']
 89 |     # list reference store and filter by name
 90 |     try:
 91 |         reference_stores = omics_client.list_reference_stores(filter={
 92 |             "name": reference_store_name
 93 |         })
 94 |     except ClientError as e:
 95 |         raise Exception( "boto3 client error : " + e.__str__())
 96 |     except Exception as e:
 97 |        raise Exception( "Unexpected error : " +    e.__str__())
 98 |     if reference_stores is None:
 99 |         return "No reference stores found"
100 |     else:
101 |         if "referenceStores" in reference_stores and reference_stores["referenceStores"] == 0:
102 |             return "No reference stores found"
103 |         else:
104 |             reference_store_id = reference_stores["referenceStores"][0]['id']      
105 |     # get references
106 |     try:
107 |         references = omics_client.list_references(referenceStoreId=reference_store_id)
108 |     except ClientError as e:
109 |         raise Exception( "boto3 client error : " + e.__str__())
110 |     except Exception as e:
111 |        raise Exception( "Unexpected error : " +    e.__str__())
112 |     
113 |     # delete all references
114 |     ids = []
115 |     if "references" not in references:
116 |         print("No references found in reference store")
117 |     else:
118 |         for i in references["references"]:
119 |             print(i)
120 |             ids.append(i['id'])
121 |         for _id in ids:
122 |             try:
123 |                 print(f"deleting reference with id: {_id}")
124 |                 response = omics_client.delete_reference(id=_id, referenceStoreId=reference_store_id)
125 |             except ClientError as e:
126 |                 raise Exception( "boto3 client error : " + e.__str__())
127 |             except Exception as e:
128 |                 raise Exception( "Unexpected error : " +    e.__str__())
129 |             logger.info(response)
130 |     
131 |     # delete reference store
132 |     try:
133 |         print(f"Attempt to delete reference store: {reference_store_name}")
134 |         response = omics_client.delete_reference_store(
135 |             id=reference_store_id
136 |             )
137 |     except ClientError as e:
138 |         raise Exception( "boto3 client error : " + e.__str__())
139 |     except Exception as e:
140 |        raise Exception( "Unexpected error : " +    e.__str__())
141 |     logger.info(response)
142 |     return reference_store_id
143 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/omics/create_variant_store_lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | from crhelper import CfnResource
 15 | import logging
 16 | import boto3
 17 | from botocore.exceptions import ClientError
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | # Initialise the helper, all inputs are optional, this example shows the defaults
 21 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 22 | 
 23 | # Initiate client
 24 | try:
 25 |     print("Attempt to initiate client")
 26 |     omics_session = boto3.Session()
 27 |     omics_client = omics_session.client('omics')
 28 |     print("Attempt to initiate client complete")
 29 | except Exception as e:
 30 |     helper.init_failure(e)
 31 | 
 32 | 
 33 | @helper.create
 34 | def create(event, context):
 35 |     logger.info("Got Create")
 36 |     create_omics_variant_store(event, context)
 37 | 
 38 | 
 39 | @helper.update
 40 | def update(event, context):
 41 |     logger.info("Got Update")
 42 |     create_omics_variant_store(event, context)
 43 | 
 44 | 
 45 | @helper.delete
 46 | def delete(event, context):
 47 |     logger.info("Got Delete - attempting to delete")
 48 |     delete_omics_variant_store(event, context)
 49 |     
 50 | 
 51 | @helper.poll_create
 52 | def poll_create(event, context):
 53 |     logger.info("Got Create poll")
 54 |     return check_variant_store_status(event, context)
 55 | 
 56 | 
 57 | @helper.poll_update
 58 | def poll_update(event, context):
 59 |     logger.info("Got Update poll")
 60 |     return check_variant_store_status(event, context)
 61 | 
 62 | 
 63 | @helper.poll_delete
 64 | def poll_delete(event, context):
 65 |     logger.info("Got Delete poll")
 66 |     return "poll delete"
 67 | 
 68 | 
 69 | def handler(event, context):
 70 |     helper(event, context)
 71 | 
 72 | 
 73 | def list_omics_variant_store(variant_store_name):
 74 |     try:
 75 |         response = omics_client.get_variant_store(name=variant_store_name)
 76 |     except ClientError as e:
 77 |         raise Exception( "boto3 client error : " + e.__str__())
 78 |     except Exception as e:
 79 |        raise Exception( "Unexpected error : " +    e.__str__())
 80 |     logger.info(response)
 81 |     
 82 | def create_omics_variant_store(event, context):
 83 |     
 84 |     variant_store_name = event['ResourceProperties']['VariantStoreName']
 85 |     reference_arn = event['ResourceProperties']['ReferenceArn']
 86 |     reference_arn_dict = {
 87 |         "referenceArn": reference_arn
 88 |     }
 89 |     try:
 90 |         print("Attempt to create variant store")
 91 |         response = omics_client.create_variant_store(
 92 |             name=variant_store_name,
 93 |             reference=reference_arn_dict
 94 |             )
 95 |     except ClientError as e:
 96 |         raise Exception( "boto3 client error : " + e.__str__())
 97 |     except Exception as e:
 98 |        raise Exception( "Unexpected error : " +    e.__str__())
 99 |     logger.info(response)
100 |     helper.Data.update({"VariantStoreId": response['id']})
101 | 
102 | def delete_omics_variant_store(event, context):
103 |     
104 |     variant_store_name = event['ResourceProperties']['VariantStoreName']
105 |     try:
106 |         print("Attempt to delete variant store")
107 |         response = omics_client.delete_variant_store(
108 |             name=variant_store_name
109 |             )
110 |     except ClientError as e:
111 |         raise Exception( "boto3 client error : " + e.__str__())
112 |     except Exception as e:
113 |        raise Exception( "Unexpected error : " +    e.__str__())
114 |     logger.info(response)
115 |     return helper.Data.get("VariantStoreId")
116 | 
117 | def check_variant_store_status(event, context):
118 |     variant_store_name = event['ResourceProperties']['VariantStoreName']
119 | 
120 |     try:
121 |         response = omics_client.get_variant_store(name=variant_store_name)
122 |     except ClientError as e:
123 |         raise Exception( "boto3 client error : " + e.__str__())
124 |     except Exception as e:
125 |        raise Exception( "Unexpected error : " +    e.__str__())
126 |     status = response['status']
127 |     
128 |     if status in ['CREATING', 'UPDATING', 'IN_PROGRESS']:
129 |         logger.info(status)
130 |         return None
131 |     else:
132 |         if status in ['READY', 'COMPLETED', 'ACTIVE', 'COMPLETE']:
133 |             logger.info(status)
134 |             return True
135 |         else:
136 |             msg = f"Variant store; {variant_store_name} has status {status}, exiting"
137 |             logger.info(msg)
138 |             raise ValueError(msg)
139 | 
140 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/omics/import_annotation_lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | from crhelper import CfnResource
 15 | import logging
 16 | import boto3
 17 | from botocore.exceptions import ClientError 
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | # Initialise the helper, all inputs are optional, this example shows the defaults
 21 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 22 | 
 23 | # Initiate client
 24 | try:
 25 |     print("Attempt to initiate client")
 26 |     omics_session = boto3.Session()
 27 |     omics_client = omics_session.client('omics')
 28 |     print("Attempt to initiate client complete")
 29 | except Exception as e:
 30 |     helper.init_failure(e)
 31 | 
 32 | 
 33 | @helper.create
 34 | def create(event, context):
 35 |     logger.info("Got Create")
 36 |     import_annotation(event, context)
 37 | 
 38 | 
 39 | @helper.update
 40 | def update(event, context):
 41 |     logger.info("Got Update")
 42 |     import_annotation(event, context)
 43 | 
 44 | 
 45 | @helper.delete
 46 | def delete(event, context):
 47 |     logger.info("Got Delete")
 48 |     return "delete"
 49 |     # Delete never returns anything. Should not fail if the underlying resources are already deleted. Desired state.
 50 | 
 51 | @helper.poll_create
 52 | def poll_create(event, context):
 53 |     logger.info("Got Create poll")
 54 |     return check_annotation_import_status(event, context)
 55 | 
 56 | 
 57 | @helper.poll_update
 58 | def poll_update(event, context):
 59 |     logger.info("Got Update poll")
 60 |     return check_annotation_import_status(event, context)
 61 | 
 62 | 
 63 | @helper.poll_delete
 64 | def poll_delete(event, context):
 65 |     logger.info("Got Delete poll")
 66 |     return "delete poll"
 67 | 
 68 | def handler(event, context):
 69 |     helper(event, context)
 70 | 
 71 | def import_annotation(event, context):
 72 |     omics_import_role_arn = event['ResourceProperties']['OmicsImportAnnotationRoleArn']
 73 |     annotation_source_s3_uri = event['ResourceProperties']['AnnotationSourceS3Uri']
 74 |     annotation_store_name = event['ResourceProperties']['AnnotationStoreName']
 75 |     try:
 76 |         print(f"Attempt to import annotation file: {annotation_source_s3_uri} to store: {annotation_store_name}")
 77 |         response = omics_client.start_annotation_import_job(
 78 |             destinationName=annotation_store_name,
 79 |             roleArn=omics_import_role_arn,
 80 |             items=[{'source': annotation_source_s3_uri}]
 81 |             )
 82 |     except ClientError as e:
 83 |         raise Exception( "boto3 client error : " + e.__str__())
 84 |     except Exception as e:
 85 |        raise Exception( "Unexpected error : " +    e.__str__())
 86 |     logger.info(response)
 87 |     helper.Data.update({"AnnotationImportJobId": response['jobId']})
 88 |     return True
 89 | 
 90 | def check_annotation_import_status(event, context):
 91 |     annotation_import_job_id = helper.Data.get("AnnotationImportJobId")
 92 | 
 93 |     try:
 94 |         response = omics_client.get_annotation_import_job(
 95 |             jobId=annotation_import_job_id
 96 |             )
 97 |     except ClientError as e:
 98 |         raise Exception( "boto3 client error : " + e.__str__())
 99 |     except Exception as e:
100 |        raise Exception( "Unexpected error : " +    e.__str__())
101 |     status = response['status']
102 |     
103 |     if status in ['SUBMITTED', 'IN_PROGRESS', 'RUNNING', 'CREATING', 'QUEUED']:
104 |         logger.info(status)
105 |         return None
106 |     else:
107 |         if status in ['READY', 'ACTIVE', 'COMPLETED', 'COMPLETE']:
108 |             logger.info(status)
109 |             return True
110 |         else:
111 |             msg = f"Annotation Import Job ID : {annotation_import_job_id} has status {status}, exiting"
112 |             logger.info(msg)
113 |             raise ValueError(msg)
114 | 
115 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/omics/import_reference_lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | from crhelper import CfnResource
 15 | import logging
 16 | import boto3
 17 | from botocore.exceptions import ClientError
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | # Initialise the helper, all inputs are optional, this example shows the defaults
 21 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 22 | 
 23 | # Initiate client
 24 | try:
 25 |     print("Attempt to initiate client")
 26 |     omics_session = boto3.Session()
 27 |     omics_client = omics_session.client('omics')
 28 |     print("Attempt to initiate client complete")
 29 | except Exception as e:
 30 |     helper.init_failure(e)
 31 | 
 32 | 
 33 | @helper.create
 34 | def create(event, context):
 35 |     logger.info("Got Create")
 36 |     import_reference(event, context)
 37 | 
 38 | 
 39 | @helper.update
 40 | def update(event, context):
 41 |     logger.info("Got Update")
 42 |     import_reference(event, context)
 43 | 
 44 | 
 45 | @helper.delete
 46 | def delete(event, context):
 47 |     logger.info("Got Delete")
 48 |     return "delete"
 49 |     # Delete never returns anything. Should not fail if the underlying resources are already deleted. Desired state.
 50 | 
 51 | @helper.poll_create
 52 | def poll_create(event, context):
 53 |     logger.info("Got Create poll")
 54 |     return check_reference_import_status(event, context)
 55 | 
 56 | 
 57 | @helper.poll_update
 58 | def poll_update(event, context):
 59 |     logger.info("Got Update poll")
 60 |     return check_reference_import_status(event, context)
 61 | 
 62 | 
 63 | @helper.poll_delete
 64 | def poll_delete(event, context):
 65 |     logger.info("Got Delete poll")
 66 |     return "Poll delete"
 67 | 
 68 | def handler(event, context):
 69 |     helper(event, context)
 70 | 
 71 | def import_reference(event, context):
 72 |     reference_store_id = event['ResourceProperties']['ReferenceStoreId']
 73 |     omics_import_role_arn = event['ResourceProperties']['OmicsImportReferenceRoleArn']
 74 |     reference_source_s3_uri = event['ResourceProperties']['ReferenceSourceS3Uri']
 75 |     reference_name = event['ResourceProperties']['ReferenceName']
 76 |     try:
 77 |         print(f"Attempt to import reference: {reference_source_s3_uri} to store: {reference_store_id}")
 78 |         response = omics_client.start_reference_import_job(
 79 |             referenceStoreId=reference_store_id,
 80 |             roleArn=omics_import_role_arn,
 81 |             sources=[{'sourceFile': reference_source_s3_uri, 'name': reference_name}]
 82 |             )
 83 |     except ClientError as e:
 84 |         raise Exception( "boto3 client error : " + e.__str__())
 85 |     except Exception as e:
 86 |        raise Exception( "Unexpected error : " +    e.__str__())
 87 |     logger.info(response)
 88 |     helper.Data.update({"ReferenceImportJobId": response['id']})
 89 |     helper.Data.update({"ReferenceStoreId": response['referenceStoreId']})
 90 |     return True
 91 | 
 92 | def get_reference_arn(reference_store_id, reference_name):
 93 |     try:
 94 |         response = omics_client.list_references(
 95 |             referenceStoreId=reference_store_id, 
 96 |             filter={'name': reference_name}
 97 |             )
 98 |     except ClientError as e:
 99 |         raise Exception( "boto3 client error : " + e.__str__())
100 |     except Exception as e:
101 |        raise Exception( "Unexpected error : " +    e.__str__())
102 |     return response['references'][0]['arn']
103 | 
104 | def check_reference_import_status(event, context):
105 |     reference_store_id = helper.Data.get("ReferenceStoreId")
106 |     reference_import_job_id = helper.Data.get("ReferenceImportJobId")
107 | 
108 |     try:
109 |         response = omics_client.get_reference_import_job(
110 |             id=reference_import_job_id, 
111 |             referenceStoreId=reference_store_id
112 |             )
113 |     except ClientError as e:
114 |         raise Exception( "boto3 client error : " + e.__str__())
115 |     except Exception as e:
116 |        raise Exception( "Unexpected error : " +    e.__str__())
117 |     status = response['status']
118 |     
119 |     if status in ['SUBMITTED', 'IN_PROGRESS', 'RUNNING']:
120 |         logger.info(status)
121 |         return None
122 |     else:
123 |         if status in ['READY', 'ACTIVE', 'COMPLETED', 'COMPLETE']:
124 |             logger.info(status)
125 |             helper.Data['Arn'] = get_reference_arn(
126 |                 reference_store_id, 
127 |                 event['ResourceProperties']['ReferenceName']
128 |                 )
129 |             return True
130 |         else:
131 |             msg = f"Reference store: {reference_store_id} has status {status}, exiting"
132 |             logger.info(msg)
133 |             raise ValueError(msg)
134 | 
135 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/omics/import_variant_lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | from crhelper import CfnResource
 15 | import logging
 16 | import boto3
 17 | from botocore.exceptions import ClientError
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | # Initialise the helper, all inputs are optional, this example shows the defaults
 21 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 22 | 
 23 | # Initiate client
 24 | try:
 25 |     print("Attempt to initiate client")
 26 |     omics_session = boto3.Session()
 27 |     omics_client = omics_session.client('omics')
 28 |     print("Attempt to initiate client complete")
 29 | except Exception as e:
 30 |     helper.init_failure(e)
 31 | 
 32 | 
 33 | @helper.create
 34 | def create(event, context):
 35 |     logger.info("Got Create")
 36 |     start_import_variants_job(event, context)
 37 | 
 38 | 
 39 | @helper.update
 40 | def update(event, context):
 41 |     logger.info("Got Update")
 42 |     start_import_variants_job(event, context)
 43 | 
 44 | 
 45 | @helper.delete
 46 | def delete(event, context):
 47 |     logger.info("Got Delete")
 48 |     return "delete"
 49 |     # Delete never returns anything. Should not fail if the underlying resources are already deleted. Desired state.
 50 | 
 51 | 
 52 | @helper.poll_create
 53 | def poll_create(event, context):
 54 |     logger.info("Got Create poll")
 55 |     return get_variant_import_job_status(event, context)
 56 | 
 57 | 
 58 | @helper.poll_update
 59 | def poll_update(event, context):
 60 |     logger.info("Got Update poll")
 61 |     return get_variant_import_job_status(event, context)
 62 | 
 63 | 
 64 | @helper.poll_delete
 65 | def poll_delete(event, context):
 66 |     logger.info("Got Delete poll")
 67 |     return "Poll delete"
 68 | 
 69 | 
 70 | def handler(event, context):
 71 |     helper(event, context)
 72 | 
 73 | 
 74 | def start_import_variants_job(event, context):
 75 |     variant_store_name = event['ResourceProperties']['VariantStoreName']
 76 |     role_arn = event['ResourceProperties']['OmicsImportVariantRoleArn']
 77 |     variant_items = [{
 78 |         "source": event['ResourceProperties']['VcfS3Uri']
 79 |     }]
 80 |     try:
 81 |         print("Attempt to start variant import job")
 82 |         response = omics_client.start_variant_import_job(
 83 |             destinationName=variant_store_name,
 84 |             roleArn=role_arn,
 85 |             items=variant_items
 86 |             )
 87 |     except ClientError as e:
 88 |         raise Exception( "boto3 client error : " + e.__str__())
 89 |     except Exception as e:
 90 |        raise Exception( "Unexpected error : " +    e.__str__())
 91 |     logger.info(response)
 92 |     helper.Data.update({"VariantImportJobId": response['jobId']})
 93 | 
 94 | def get_variant_import_job_status(event, context):
 95 |     variant_store_name = event['ResourceProperties']['VariantStoreName']
 96 |     variant_import_job_id = helper.Data.get('VariantImportJobId')
 97 |     
 98 |     try:
 99 |         response = omics_client.get_variant_import_job(jobId=variant_import_job_id)
100 |     except ClientError as e:
101 |         raise Exception( "boto3 client error : " + e.__str__())
102 |     except Exception as e:
103 |        raise Exception( "Unexpected error : " +    e.__str__())
104 |     status = response['status']
105 |     
106 |     if status in ['CREATING', 'UPDATING', 'IN_PROGRESS', 'QUEUED', 'SUBMITTED']:
107 |         logger.info(status)
108 |         return None
109 |     else:
110 |         if status in ['READY', 'COMPLETED', 'ACTIVE', 'COMPLETE']:
111 |             logger.info(status)
112 |             return True
113 |         else:
114 |             msg = f"Variant import job {variant_import_job_id} in\
115 |                  variant store {variant_store_name} has status {status}, exiting"
116 |             logger.info(msg)
117 |             raise ValueError(msg)
118 | 
119 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/scripts/create_tcga_summary.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import pandas as pd
  3 | import io
  4 | import re
  5 | import time
  6 | import sys
  7 | import json
  8 | 
  9 | from awsglue.utils import getResolvedOptions
 10 | 
 11 | args = getResolvedOptions(sys.argv, ['database', 'bucket', 'workgroup'])
 12 | 
 13 | session = boto3.Session()
 14 | 
 15 | def start_query_execution(client, params):
 16 |     response = client.start_query_execution(
 17 |         QueryString=params["query"],
 18 |         QueryExecutionContext={
 19 |             'Database': params['database']
 20 |         },
 21 |         ResultConfiguration={
 22 |             'OutputLocation': 's3://' + params['bucket'] + '/' + params['path']
 23 |         },
 24 |         WorkGroup=params['workgroup']
 25 |     )
 26 |     return response
 27 | 
 28 | def run_query(session, params, max_execution=6):
 29 |     print(json.dumps(params))
 30 |     print(params['query'])
 31 |     client = session.client('athena')
 32 |     execution = start_query_execution(client, params)
 33 |     execution_id = execution['QueryExecutionId']
 34 |     state = 'RUNNING'
 35 | 
 36 |     while max_execution > 0 and state in ['RUNNING']:
 37 |         time.sleep(5)
 38 |         max_execution = max_execution - 1
 39 |         response = client.get_query_execution(QueryExecutionId=execution_id)
 40 |         print(response)
 41 | 
 42 |         if 'QueryExecution' in response and \
 43 |                 'Status' in response['QueryExecution'] and \
 44 |                 'State' in response['QueryExecution']['Status']:
 45 |             state = response['QueryExecution']['Status']['State']
 46 |             if state == 'FAILED':
 47 |                 raise Exception('Athena query execution failed: ' + str(response['QueryExecution']['Status']))
 48 |             elif state == 'SUCCEEDED':
 49 |                 response = client.get_query_results(QueryExecutionId=execution_id)
 50 |                 print(response)
 51 |                 if 'ResultSet' in response and 'Rows' in response['ResultSet']:
 52 |                     return response['ResultSet']['Rows']
 53 |                 else:
 54 |                     return None
 55 | 
 56 | # If the tcga_summary table exists, remove it along with its data
 57 | rows = run_query(session, params={
 58 |     'workgroup': args['workgroup'],
 59 |     'database': args['database'],
 60 |     'bucket': args['bucket'],
 61 |     'path': 'results/create-summary',
 62 |     'query': "DROP TABLE IF EXISTS tcga_summary;"
 63 | })
 64 | print(rows)
 65 | 
 66 | s3 = boto3.resource('s3')
 67 | s3_bucket = s3.Bucket(args['bucket'])
 68 | s3_bucket.objects.filter(Prefix="tcga-summary/").delete()
 69 | 
 70 | # This is the main query to create the tcga_summary table. It consists
 71 | # of a SELECT from the clinical_patient table, with a number of joins
 72 | # which count the number of records in the other tables in the data
 73 | # set.
 74 | QUERY = """
 75 | CREATE TABLE tcga_summary 
 76 | WITH (
 77 |     external_location = 's3://{bucket}/tcga-summary/',
 78 |     format = 'PARQUET',
 79 |     parquet_compression = 'SNAPPY'
 80 | )
 81 | AS
 82 | SELECT clinical_patient.bcr_patient_barcode as patient_id,
 83 |     QTY_IMGS.quantity as num_images,
 84 |     QTY_IMG_SER.quantity as num_image_series,
 85 |     QTY_MUT.quantity as num_mutation_records,
 86 |     QTY_EXP.quantity as num_expression_records,
 87 |     QTY_CNV.quantity as num_cnv_records,
 88 |     QTY_CLIN_DRUG.quantity as num_clin_drug_records,
 89 |     QTY_CLIN_RAD.quantity as num_clin_rad_records,
 90 |     QTY_CLIN_FOL.quantity as num_clin_fol_records,
 91 |     QTY_CLIN_OMF.quantity as num_clin_omf_records,
 92 |     QTY_CLIN_NTE.quantity as num_clin_nte_records
 93 | FROM clinical_patient
 94 | 
 95 | LEFT JOIN
 96 |     (   SELECT COUNT(tcia_patients.patientid) AS quantity, 
 97 |             tcia_patients.patientid 
 98 |         FROM tcia_patients
 99 |         GROUP BY tcia_patients.patientid
100 |     ) AS QTY_IMGS
101 | ON clinical_patient.bcr_patient_barcode = QTY_IMGS.patientid
102 | 
103 | LEFT JOIN
104 |     (   SELECT COUNT(tcia_image_series.patientid) AS quantity, 
105 |             tcia_image_series.patientid 
106 |         FROM tcia_image_series
107 |         GROUP BY tcia_image_series.patientid
108 |     ) AS QTY_IMG_SER
109 | ON clinical_patient.bcr_patient_barcode = QTY_IMG_SER.patientid
110 | 
111 | LEFT JOIN
112 |     (   SELECT COUNT(tcga_mutation.submitter_id) AS quantity, 
113 |             tcga_mutation.submitter_id
114 |         FROM tcga_mutation
115 |         GROUP BY tcga_mutation.submitter_id
116 |     ) AS QTY_MUT
117 | ON clinical_patient.bcr_patient_barcode = QTY_MUT.submitter_id
118 | 
119 | -- The expression data is stored in a unique format - each patient ID is a column in one of two tables.
120 | -- In order to query this, we use the `information_schema` special table which contains the metadata
121 | -- about all tables in the database. This special table is first filtered and transformed via a computed
122 | -- table expression and then grouped and joined to match the results of the other tables.
123 | LEFT JOIN     
124 |     (   WITH expression_patients AS (
125 |             SELECT upper(substring(column_name, 1, 12)) AS patientid
126 |             FROM information_schema.columns
127 |             WHERE table_schema = '{database_name}'
128 |             AND table_name LIKE 'expression_tcga_%'
129 |             AND upper(column_name) LIKE 'TCGA-%'
130 |         )
131 |         SELECT COUNT(expression_patients.patientid) AS quantity, 
132 |             expression_patients.patientid
133 |         FROM expression_patients
134 |         GROUP BY expression_patients.patientid
135 |     ) AS QTY_EXP
136 | ON clinical_patient.bcr_patient_barcode = QTY_EXP.patientid
137 | 
138 | LEFT JOIN
139 |     (   SELECT COUNT(tcga_cnv.submitter_id[1]) AS quantity, 
140 |             tcga_cnv.submitter_id[1] as submitter_id
141 |         FROM tcga_cnv
142 |         WHERE copy_number IS NOT NULL
143 |         GROUP BY tcga_cnv.submitter_id[1]
144 |     ) AS QTY_CNV
145 | ON clinical_patient.bcr_patient_barcode = QTY_CNV.submitter_id
146 | 
147 | LEFT JOIN
148 |     (   SELECT COUNT(clinical_drug.bcr_patient_barcode) AS quantity, 
149 |             clinical_drug.bcr_patient_barcode
150 |         FROM clinical_drug
151 |         GROUP BY clinical_drug.bcr_patient_barcode
152 |     ) AS QTY_CLIN_DRUG
153 | ON clinical_patient.bcr_patient_barcode = QTY_CLIN_DRUG.bcr_patient_barcode
154 | 
155 | LEFT JOIN
156 |     (   SELECT COUNT(clinical_radiation.bcr_patient_barcode) AS quantity, 
157 |             clinical_radiation.bcr_patient_barcode
158 |         FROM clinical_radiation
159 |         GROUP BY clinical_radiation.bcr_patient_barcode
160 |     ) AS QTY_CLIN_RAD
161 | ON clinical_patient.bcr_patient_barcode = QTY_CLIN_RAD.bcr_patient_barcode
162 | 
163 | LEFT JOIN
164 |     (   SELECT COUNT(clinical_follow_up_v1_0.bcr_patient_barcode) AS quantity, 
165 |             clinical_follow_up_v1_0.bcr_patient_barcode
166 |         FROM clinical_follow_up_v1_0
167 |         GROUP BY clinical_follow_up_v1_0.bcr_patient_barcode
168 |     ) AS QTY_CLIN_FOL
169 | ON clinical_patient.bcr_patient_barcode = QTY_CLIN_FOL.bcr_patient_barcode
170 | 
171 | LEFT JOIN
172 |     (   SELECT COUNT(clinical_omf_v4_0.bcr_patient_barcode) AS quantity, 
173 |             clinical_omf_v4_0.bcr_patient_barcode
174 |         FROM clinical_omf_v4_0
175 |         GROUP BY clinical_omf_v4_0.bcr_patient_barcode
176 |     ) AS QTY_CLIN_OMF
177 | ON clinical_patient.bcr_patient_barcode = QTY_CLIN_OMF.bcr_patient_barcode
178 | 
179 | -- The NTE data is split across two tables, so in order to have one quantity for both tables, we union
180 | -- the results of the same query together.
181 | 
182 | LEFT JOIN
183 |     (   SELECT COUNT(clinical_nte_tcga_luad.bcr_patient_barcode) AS quantity, 
184 |             clinical_nte_tcga_luad.bcr_patient_barcode
185 |         FROM clinical_nte_tcga_luad
186 |         GROUP BY clinical_nte_tcga_luad.bcr_patient_barcode
187 |         UNION ALL 
188 |         SELECT COUNT(clinical_nte_tcga_lusc.bcr_patient_barcode) AS quantity,
189 |             clinical_nte_tcga_lusc.bcr_patient_barcode
190 |         FROM clinical_nte_tcga_lusc
191 |         GROUP BY clinical_nte_tcga_lusc.bcr_patient_barcode
192 |     ) AS QTY_CLIN_NTE
193 | ON clinical_patient.bcr_patient_barcode = QTY_CLIN_NTE.bcr_patient_barcode
194 | """.format(database_name=args['database'], bucket=args['bucket'])
195 | 
196 | rows = run_query(session, params={
197 |     'workgroup': args['workgroup'],
198 |     'database': args['database'],
199 |     'bucket': args['bucket'],
200 |     'path': 'results/create-summary',
201 |     'query': QUERY
202 | })
203 | print(rows)
204 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/scripts/image_api_glue.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import datetime
  3 | import requests
  4 | 
  5 | from awsglue.utils import getResolvedOptions
  6 | from pyspark.context import SparkContext
  7 | from awsglue.context import GlueContext
  8 | from awsglue.job import Job
  9 | 
 10 | from pyspark.sql.functions import udf, struct, explode
 11 | from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType, ArrayType
 12 | 
 13 | sc = SparkContext.getOrCreate()
 14 | glueContext = GlueContext(sc)
 15 | spark = glueContext.spark_session
 16 | 
 17 | API = "https://services.cancerimagingarchive.net/services/v4/TCIA/query/"
 18 | 
 19 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'project', 'output_bucket'])
 20 | project_id = args['project']
 21 | output_bucket = args['output_bucket']
 22 | 
 23 | 
 24 | def get_patient_study(api_url, project, patient_id):
 25 |     parameters = {"format": "json", "Collection": project, "PatientID": patient_id}
 26 |     patient_study_url = api_url + "getPatientStudy"
 27 |     r = requests.get(patient_study_url, parameters)
 28 |     print(f"[INFO] GET request from:\n '{r.url}'...")
 29 |     return r.json()
 30 | 
 31 | 
 32 | def get_all_patients(api_url, project):
 33 |     parameters = {"format": "json", "Collection": project}
 34 |     url = api_url + "getPatient"
 35 |     r = requests.get(url, parameters)
 36 |     print(f"[INFO] GET request from:\n '{r.url}'...")
 37 |     return r.json()
 38 | 
 39 | 
 40 | def get_series(api_url: str, patient_id: str, study_instance_uid: str) -> list:
 41 |     """Parameter: `patient_id` and `study_instance_uid` are strings from
 42 |     an entry in `build_patient_metadata`'s returned list. Returns a
 43 |     list of all metadata and SeriesInstanceUIDs associated with
 44 |     PatientID and StudyInstanceUID. There may be many
 45 |     SeriesInstanceUIDs associated wtih a StudyInstanceUID, and many
 46 |     StudyInstanceUIDs associated wtih each Patient.
 47 | 
 48 |     """
 49 |     parameters = {
 50 |         "format": "json",
 51 |         "PatientID": patient_id,
 52 |         "StudyInstanceUID": study_instance_uid
 53 |     }
 54 |     url = api_url + "getSeries"
 55 |     r = requests.get(url, parameters)
 56 |     print(f"[INFO] GET request from:\n '{r.url}'...")
 57 |     return r.json()
 58 | 
 59 | 
 60 | def fmt_age(patient_dict):
 61 |     age = patient_dict.get("PatientAge", "")
 62 |     if not age:
 63 |         return None
 64 |     try:
 65 |         return int(age.replace("Y", "").replace("y", ""))
 66 |     except ValueError as e:
 67 |         print(f"[WARNING] Problem parsing: '{age}' to an integer. Exception"
 68 |               f" is '{e}'")
 69 |         return None
 70 | 
 71 | 
 72 | def fmt_date(datestr):
 73 |     if datestr is None:
 74 |         return None
 75 |     return datetime.datetime.strptime(datestr, "%Y-%m-%d").date()
 76 |     
 77 | 
 78 | metadata_udf_schema = ArrayType(StructType([
 79 |     StructField("Collection", StringType(), False),
 80 |     StructField("PatientID", StringType(), False),
 81 |     StructField("PatientSex", StringType(), False),
 82 |     StructField("StudyDate", DateType(), False),
 83 |     StructField("PatientAge", IntegerType(), True),
 84 |     StructField("SeriesCount", IntegerType(), False),
 85 |     StructField("StudyInstanceUID", StringType(), False),
 86 |     StructField("StudyDescription", StringType(), True)
 87 | ]))
 88 | 
 89 | @udf(returnType=metadata_udf_schema)
 90 | def build_patient_metadata_udf(collection, patient_id):
 91 |     """builds the data table for a study given a patient."""
 92 |     r = []
 93 |     for patient_study in get_patient_study(API, collection, patient_id):
 94 |         r.append((
 95 |             patient_study["Collection"],
 96 |             patient_study["PatientID"],
 97 |             patient_study.get("PatientSex", "unknown"),
 98 |             fmt_date(patient_study["StudyDate"]),
 99 |             fmt_age(patient_study),
100 |             patient_study["SeriesCount"],
101 |             patient_study["StudyInstanceUID"],
102 |             patient_study.get("StudyDescription", "")
103 |         ))
104 |     return r
105 | 
106 | 
107 | image_udf_schema = ArrayType(StructType([
108 |     StructField("Collection", StringType(), False),
109 |     StructField("PatientID", StringType(), False),
110 |     StructField("SeriesInstanceUID", StringType(), False),
111 |     StructField("ImageCount", IntegerType(), False),
112 |     StructField("BodyPartExamined", StringType(), True),
113 |     StructField("Modality", StringType(), True),
114 |     StructField("Manufacturer", StringType(), True),
115 |     StructField("ManufacturerModelName", StringType(), True),
116 |     StructField("ProtocolName", StringType(), True),
117 |     StructField("SeriesDate", DateType(), True),
118 |     StructField("SeriesDescription", StringType(), True),
119 |     StructField("SeriesNumber", StringType(), True),
120 |     StructField("SoftwareVersions", StringType(), True),
121 |     StructField("Visibility", StringType(), True)
122 | ]))
123 | 
124 | @udf(returnType=image_udf_schema)
125 | def build_image_metadata_udf(collection, patient_id, study_instance_uid):
126 |     r = []
127 |     for series in get_series(API, patient_id, study_instance_uid):
128 |         r.append((
129 |             series['Collection'],
130 |             series['PatientID'],
131 |             series['SeriesInstanceUID'],
132 |             series['ImageCount'],
133 |             series.get('BodyPartExamined'),
134 |             series.get('Modality'),
135 |             series.get('Manufacturer'),
136 |             series.get('ManufacturerModelName'),
137 |             series.get('ProtocolName'),
138 |             fmt_date(series.get('SeriesDate')),
139 |             series.get('SeriesDescription'),
140 |             series.get('SeriesNumber'),
141 |             series.get('SoftwareVersions'),
142 |             series.get('Visibility')
143 |         ))
144 |     return r
145 |             
146 |             
147 | 
148 | ## Step 1: Read a JSON structure and create a dataframe.
149 | #
150 | # This calls get_all_patients, passes it through the sc.parallelize()
151 | # method to turn it into an RDD, and then uses spark.read.json() to
152 | # turn the array into a structure with rows and columns. The resulting
153 | # structure will mostly just be a list of patients along with their
154 | # associated project ID.
155 | 
156 | df = spark.read.json(sc.parallelize([get_all_patients(API, project_id)]))
157 | print(f'Patients: {df.count()} Columns: {df.columns}')
158 | assert df.count() > 0, 'Error: no rows retrieved from API'
159 | 
160 | df = df.repartition(8)
161 | 
162 | ## Step 2: Apply a UDF to the dataframe to get data on a per-patient level.
163 | #
164 | # We need to make one API request per patient to get the full set of
165 | # metadata. By doing it through a UDF, PySpark will parallelize the
166 | # requests across all of the allocated workers, which will improve
167 | # performance.
168 | 
169 | # Apply the build_patient_metadata_udf to every row in df, and store
170 | # the result in the Result column.
171 | patient_df = df.withColumn(
172 |     'Result', explode(build_patient_metadata_udf('Collection', 'PatientID'))
173 | )
174 | patient_df.printSchema()
175 | 
176 | # The Result column now has a nested structure with the multiple
177 | # fields returned from build_patient_metadata_udf. We can flatten that
178 | # out by using the select call and dot-syntax to select the nested
179 | # columns.
180 | patient_df = patient_df.select(
181 |     "Collection", "PatientID", "PatientName", "PatientSex",
182 |     "Result.StudyDate", "Result.PatientAge", "Result.SeriesCount",
183 |     "Result.StudyInstanceUID", "Result.StudyDescription"
184 | )
185 | patient_df.printSchema()
186 | 
187 | ## Step 3: Write the output to S3 in Parquet format.
188 | #
189 | # This is straightforward - define an S3 destination as a prefix (aka
190 | # folder) and call the patient_df.write.parquet method. The
191 | # mode("overwrite") call ensures that we are not going to append to
192 | # data already present in that prefix.
193 | 
194 | dest = f's3://{output_bucket}/tcia-metadata/tcia-patients/{project_id}'
195 | patient_df.write.mode("overwrite").parquet(dest)
196 | 
197 | 
198 | ## Now we're going to repeat this process for the images
199 | 
200 | # Get the relevant parts of the patient_df to start building the image_df
201 | image_df = patient_df.select("Collection", "PatientID", "StudyInstanceUID")
202 | 
203 | # Apply the UDF
204 | image_df = image_df.withColumn(
205 |     'Result', explode(build_image_metadata_udf('Collection', 'PatientID',
206 |                                                'StudyInstanceUID'))
207 | )
208 | image_df.printSchema()
209 | 
210 | image_df = image_df.select(
211 |     "Collection", "PatientID", "StudyInstanceUID",
212 |     "Result.SeriesInstanceUID", "Result.ImageCount", "Result.BodyPartExamined",
213 |     "Result.Modality", "Result.Manufacturer", "Result.ManufacturerModelName",
214 |     "Result.ProtocolName", "Result.SeriesDate", "Result.SeriesDescription",
215 |     "Result.SeriesNumber", "Result.SoftwareVersions", "Result.Visibility"
216 | )
217 | image_df.printSchema()
218 | 
219 | dest = f's3://{output_bucket}/tcia-metadata/tcia-image-series/{project_id}'
220 | image_df.write.mode("overwrite").parquet(dest)
221 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/scripts/run_tests.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import pandas as pd
 3 | import io
 4 | import re
 5 | import time
 6 | import sys
 7 | 
 8 | from awsglue.utils import getResolvedOptions
 9 | 
10 | args = getResolvedOptions(sys.argv, ['database', 'bucket'])
11 | 
12 | session = boto3.Session()
13 | 
14 | def start_query_execution(client, params):
15 |     
16 |     response = client.start_query_execution(
17 |         QueryString=params["query"],
18 |         QueryExecutionContext={
19 |             'Database': params['database']
20 |         },
21 |         ResultConfiguration={
22 |             'OutputLocation': 's3://' + params['bucket'] + '/' + params['path']
23 |         }
24 |     )
25 |     return response
26 | 
27 | def run_query(session, params, max_execution = 5):
28 |     client = session.client('athena')
29 |     execution = start_query_execution(client, params)
30 |     execution_id = execution['QueryExecutionId']
31 |     state = 'RUNNING'
32 | 
33 |     while (max_execution > 0 and state in ['RUNNING']):
34 |         max_execution = max_execution - 1
35 |         response = client.get_query_execution(QueryExecutionId = execution_id)
36 | 
37 |         if 'QueryExecution' in response and \
38 |                 'Status' in response['QueryExecution'] and \
39 |                 'State' in response['QueryExecution']['Status']:
40 |             state = response['QueryExecution']['Status']['State']
41 |             if state == 'FAILED':
42 |                 return None
43 |             elif state == 'SUCCEEDED':
44 |                 response = client.get_query_results(QueryExecutionId=execution_id)
45 |                 if 'ResultSet' in response and 'Rows' in response['ResultSet']:
46 |                     return response['ResultSet']['Rows']
47 |                 else:
48 |                     return None
49 |         time.sleep(1)
50 |     
51 |     return None
52 | 
53 | rows = run_query(session, params={
54 |     'database': args['database'],
55 |     'bucket': args['bucket'],
56 |     'path': 'results/test/annotation',
57 |     'query': 'select * from clinvar limit 10;'
58 | })
59 | assert len(rows) == 10 + 1
60 | 
61 | rows = run_query(session, params={
62 |     'database': args['database'],
63 |     'bucket': args['bucket'],
64 |     'path': 'results/test/vcf',
65 |     'query': 'select * from vcf limit 10;'
66 | })
67 | assert len(rows) == 10 + 1
68 | 
69 | rows = run_query(session, params={
70 |     'database': args['database'],
71 |     'bucket': args['bucket'],
72 |     'path': 'results/test/cohort',
73 |     'query': 'select * from onekg_chr22_by_sample limit 10;'
74 | })
75 | assert len(rows) == 10 + 1


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/scripts/tcga_etl_common_job.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | #
  4 | # Permission is hereby granted, free of charge, to any person
  5 | # obtaining a copy of this software and associated documentation files
  6 | # (the "Software"), to deal in the Software without restriction,
  7 | # including without limitation the rights to use, copy, modify, merge,
  8 | # publish, distribute, sublicense, and/or sell copies of the Software,
  9 | # and to permit persons to whom the Software is furnished to do so.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 12 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 13 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 14 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 15 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 16 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 17 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 18 | # SOFTWARE.
 19 | #
 20 | ### Glue job to fetch and aggregate gene expression data from TCGA
 21 | ## Command line arguments:
 22 | ## --project - the TCGA project (e.g TCGA-BRCA for breast cancer)
 23 | ## --output_bucket - the output bucket where results are to be written to
 24 | ## --data_type - the type of data to be retrieved
 25 | 
 26 | import sys, os
 27 | import re
 28 | import boto3
 29 | import requests 
 30 | import json
 31 | import pandas as pd
 32 | import gzip
 33 | 
 34 | from awsglue.transforms import *
 35 | from awsglue.utils import getResolvedOptions
 36 | from pyspark.context import SparkContext
 37 | from awsglue.context import GlueContext
 38 | from awsglue.job import Job
 39 | from pyspark.sql.types import Row, StringType, IntegerType, ArrayType, \
 40 |     StructType, DoubleType, BooleanType, DateType
 41 | from pyspark.sql.functions import input_file_name, concat, col
 42 | from pyspark.sql.functions import first, last
 43 | from pyspark.sql.types import IntegerType
 44 | from pyspark.sql.functions import udf, struct, explode
 45 | 
 46 | sc = SparkContext.getOrCreate()
 47 | glueContext = GlueContext(sc)
 48 | spark = glueContext.spark_session
 49 | job = Job(glueContext)
 50 | 
 51 | 
 52 | ## Get the argumentlist
 53 | 
 54 | args = getResolvedOptions(
 55 |     sys.argv,
 56 |     ['JOB_NAME', 'project', 'data_type', 'output_bucket']
 57 | )
 58 | 
 59 | ## The GDC endpoint for files and the NCI endpoint to query for the S3 URL
 60 | 
 61 | files_endpt = 'https://api.gdc.cancer.gov/files'
 62 | data_endpt = 'https://api.gdc.cancer.gov/data'
 63 | 
 64 | s3 = boto3.resource('s3')
 65 | output_bucket = args['output_bucket']
 66 | 
 67 | project_id = args['project']
 68 | 
 69 | 
 70 | def get_data(uuid, sample_submitter_id):
 71 |     """Query the NCI endpoint for the S3 path.
 72 | 
 73 |     Inputs to this method are the UUID and submitter ID from the GDC
 74 |     endpoint query.
 75 | 
 76 |     """
 77 |     s3c = boto3.client('s3')
 78 |     query_response = requests.get(data_endpt + "/" + uuid)
 79 |     if 'Content-Disposition' in query_response.headers:
 80 |         filename = re.findall(
 81 |             r'filename=(.+)',
 82 |             query_response.headers['content-disposition']
 83 |         )[0]
 84 |         filename = filename.strip('"\'')
 85 |     else:
 86 |         filename = uuid
 87 |     key = f'tcga-raw-objects-by-uuid/{uuid}/{filename}'
 88 |     s3c.put_object(Bucket=output_bucket, Key=key, Body=query_response.content)
 89 |     return f's3://{output_bucket}/{key}'
 90 |     
 91 | 
 92 | ### Step 1: Query the GDC endpoint to retrieve the list of files
 93 | ### associated with the specified project.
 94 | 
 95 | # Build a comma-separated list of fields
 96 | fields = [
 97 |     "file_name",
 98 |     "cases.primary_site",
 99 |     "cases.case_id",
100 |     "cases.project.project_id",
101 |     "cases.submitter_id",
102 |     "cases.samples.submitter_id",
103 |     "cases.samples.sample_id",
104 | 
105 | ]
106 | fields = ','.join(fields)
107 | 
108 | size = 5000
109 | data_type = args['data_type']
110 | 
111 | # Define the core filters for the query
112 | filters = {
113 |     "op": "and",
114 |     "content":[{
115 |         "op": "in",
116 |         "content": {
117 |             "field": "cases.project.project_id",
118 |             "value": [project_id]
119 |         }
120 |     }, {
121 |         "op": "in",
122 |         "content": {
123 |             "field": "files.data_type",
124 |             "value": [data_type]
125 |         }
126 |     }]
127 | }
128 | 
129 | # Add additional filters on a per-data-type basis
130 | if data_type == 'Gene Expression Quantification':
131 |     exp_wftype = 'STAR-Counts'
132 |     exp_wftype_filter = 'STAR - Counts'
133 |     
134 |     filters['content'].append({
135 |         "op": "in",
136 |         "content": {
137 |             "field": "files.analysis.workflow_type",
138 |             "value": [exp_wftype_filter]
139 |         }
140 |     })
141 | 
142 | elif data_type == 'Gene Level Copy Number':
143 |     filters['content'].append({
144 |         "op": "in",
145 |         "content": {
146 |             "field": "files.data_category",
147 |             "value": ["Copy Number Variation"]
148 |         }
149 |     })
150 | 
151 | elif data_type == 'Masked Somatic Mutation':
152 |     filters['content'].extend([{
153 |         "op": "in",
154 |         "content": {
155 |             "field": "files.data_category",
156 |             "value": ["Simple Nucleotide Variation"]
157 |         }
158 |     }, {
159 |         "op": "in",
160 |         "content": {
161 |             "field": "files.data_format",
162 |             "value": ["MAF"]
163 |         }
164 |     }])
165 |     
166 | elif data_type == 'Clinical Supplement':
167 |     filters['content'].extend([{
168 |         "op": "in",
169 |         "content": {
170 |             "field": "files.data_category",
171 |             "value": ["Clinical"]
172 |         }
173 |     }, {
174 |         "op": "in",
175 |         "content": {
176 |             "field": "files.data_format",
177 |             "value": ["BCR Biotab"]
178 |         }
179 |     }])
180 | 
181 | # With a GET request, the filters parameter needs to be converted
182 | # from a dictionary to JSON-formatted string
183 | 
184 | params = {
185 |     "filters": json.dumps(filters),
186 |     "fields": fields,
187 |     "format": "JSON",
188 |     "size": size
189 | }
190 | 
191 | # query the files endpoint and get back JSON response
192 | query_response = requests.get(files_endpt, params=params)
193 | json_response = json.loads(
194 |     query_response.content.decode("utf-8"))["data"]["hits"]
195 | 
196 | print(len(json_response))
197 | 
198 | 
199 | ### Step 2: Read the query response into a Spark DataFrame, and then
200 | ### use Spark parallelization to resolve the file UUID to an S3 path
201 | ### via the UDF defined above.
202 | 
203 | # Parallel read of JSON object and repartition to distribute to all workers
204 | df = spark.read.json(sc.parallelize([json_response]))
205 | df2 = df.repartition(8)
206 | 
207 | if data_type == 'Gene Expression Quantification':
208 |     uf = df2.select(
209 |         "id",
210 |         explode(df2.cases.samples)
211 |     ).toDF(
212 |         "id", "samples"
213 |     ).select(
214 |         'id','samples.submitter_id','samples.sample_id'
215 |     )
216 | 
217 | else:
218 |     uf = df2.select(
219 |         'id', 'cases.submitter_id'
220 |     )
221 |     
222 | # Call the get_data() function defined above as a Spark user-defined
223 | # function (UDF) to convert the UUIDs stored in the 'id' column into
224 | # S3 paths. This is done using this mechanism in order to parallelize
225 | # the process, improving throughput.
226 | urldf = udf(get_data)
227 | inputpath = uf.withColumn('Result', urldf('id', 'submitter_id'))
228 | 
229 | # Convert the data frame back into a native Python list
230 | inputlist = list(inputpath.select('Result').toPandas()['Result'])
231 | 
232 | 
233 | ### Step 3: Read the CSV files in the input list and perform any
234 | ### required per-data-type translation.
235 | 
236 | if data_type == 'Clinical Supplement':
237 |     
238 |     ## Clinical data is treated differently since we will process each
239 |     ## input file individually to create one table for each type of
240 |     ## clinical data available.    
241 |     for filename in inputlist:
242 |         dfname = os.path.splitext(
243 |             os.path.basename(filename)
244 |         )[0].replace("nationwidechildrens.org_","")
245 |         trim = max(i for i in range(len(project_id))
246 |                    if dfname.lower().endswith(project_id[-i:].lower()))
247 |         dfname = dfname[:-trim].rstrip('_')
248 | 
249 |         # Read the file in CSV format into a dataframe
250 |         output_df = spark.read.option("sep","\t").csv(filename,header=True)
251 | 
252 |         # Remove rows that have obviously superfluous data
253 |         any_column = output_df.columns[0]
254 |         output_df = output_df.filter(~ (
255 |             (output_df[any_column] == any_column) | 
256 |             (output_df[any_column].startswith('CDE_ID:'))
257 |         ))
258 |         
259 |         table_part = f'{dfname}/{project_id}'
260 |         if dfname == 'clinical_nte':
261 |             # workaround for one table
262 |             table_part = f'{dfname}_{project_id}'
263 |         
264 |         dest = f's3://{output_bucket}/tcga-clinical/{table_part}'
265 |         output_df.write.mode("overwrite").parquet(dest)
266 |     
267 | else:
268 |     ## Read the inputlist into a single dataframe
269 | 
270 |     # Certain data types require specific read options or pre-processing
271 |     if data_type == 'Gene Expression Quantification':
272 |         data = spark.read.option("sep", "\t").csv(
273 |             inputlist, header=True, mode="DROPMALFORMED", comment="#",
274 |             inferSchema=True
275 |         )
276 |         data = data.filter(~ (data.gene_id.startswith('N_')))
277 | 
278 |     elif data_type == 'Masked Somatic Mutation':
279 |         data = spark.read.format("csv")\
280 |             .option("header", "true")\
281 |             .option("inferSchema", "true")\
282 |             .option("delimiter", "\t")\
283 |             .option("comment", "#")\
284 |             .load(inputlist)
285 | 
286 |         # Reformat submitter_id column
287 |         data = data.withColumn('new_submitter_id',
288 |                                data.Tumor_Sample_Barcode.substr(1, 12))
289 |         
290 |     else:
291 |         data = spark.read.option("sep", "\t").csv(
292 |             inputlist, header=True, mode="DROPMALFORMED"
293 |         )
294 | 
295 |     ## Add a column with file path, which adds the s3 file path from which
296 |     ## row is extracted
297 |     data = data.withColumn("fullpath", input_file_name())
298 | 
299 |     ## Add a column which is a substring of full s3 path and gives
300 |     ## filename so that we can match and join with Json data
301 |     data = data.withColumn("file", data.fullpath.substr(55,100))
302 |     if data_type in ('Gene Level Copy Number',
303 |                      'Gene Expression Quantification'):
304 |         data = data.withColumn("EnsemblGene", data.gene_id.substr(1,15))
305 | 
306 |     ## Join the data with the frame that includes submitter ID
307 |     output_df = data.join(inputpath,data["fullpath"]==inputpath["Result"])
308 | 
309 |     ## Perform per-data-type post-processing and define the destination path
310 |         
311 |     if data_type == 'Gene Expression Quantification':
312 |         ## Select only relevant columns
313 |         expression_col = 'tpm_unstranded'
314 |         allgene = output_df.select(
315 |             "EnsemblGene",
316 |             output_df["submitter_id"].getItem(0),
317 |             expression_col
318 |         ).withColumnRenamed("submitter_id[0]", "submitter_id")
319 | 
320 |         ## Pivot the dataframe to form the expression matrix
321 |         output_df = allgene.groupBy('EnsemblGene') \
322 |                            .pivot('submitter_id') \
323 |                            .agg(first(expression_col))
324 |         dest = f's3://{output_bucket}/tcga-expression/expression_{project_id}/{exp_wftype}'
325 | 
326 |     elif data_type == 'Gene Level Copy Number':
327 |         dest = f's3://{output_bucket}/tcga-cnv/{project_id}'
328 | 
329 |     elif data_type == 'Masked Somatic Mutation':
330 |         output_df = output_df.drop('submitter_id')\
331 |                              .withColumnRenamed('new_submitter_id',
332 |                                                 'submitter_id')
333 |         
334 |         dest = f's3://{output_bucket}/tcga-mutation/{project_id}'
335 |         
336 |     ## Write the data frame to the output bucket
337 |     output_df.write.mode("overwrite").parquet(dest)
338 | 
339 |     
340 | job.commit()
341 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/resources/scripts/transfer_tcia_images_glue.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import sys
 3 | import hashlib
 4 | import requests
 5 | from zipfile import ZipFile
 6 | 
 7 | from awsglue.transforms import *
 8 | from awsglue.utils import getResolvedOptions
 9 | from pyspark.context import SparkContext
10 | from awsglue.context import GlueContext
11 | from awsglue.job import Job
12 | 
13 | import boto3
14 | 
15 | API = "https://services.cancerimagingarchive.net/services/v4/TCIA/query/"
16 | 
17 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'database', 'output_bucket'])
18 | 
19 | DATABASE = args['database']
20 | BUCKET_NAME = args['output_bucket']
21 | 
22 | sc = SparkContext()
23 | glueContext = GlueContext(sc)
24 | spark = glueContext.spark_session
25 | job = Job(glueContext)
26 | job.init(args['JOB_NAME'], args)
27 | 
28 | image_series = glueContext.create_dynamic_frame.from_catalog(
29 |     database=DATABASE,
30 |     table_name="tcia_image_series",
31 |     transformation_ctx="datasource0"
32 | )
33 | 
34 | 
35 | def get_image_stream_unzip(row):
36 |     """Get Zip file from API, unpack in memory and write to S3 bucket"""
37 |     S3_CLIENT = boto3.resource("s3")
38 |     url = API + "getImage"
39 |     seriesuid = row.SeriesInstanceUID
40 |     modality = row.Modality
41 |     parameters = {"SeriesInstanceUID": seriesuid}
42 |     with requests.get(url, params=parameters, stream=True) as req:
43 |         req.raise_for_status()
44 |         with io.BytesIO() as byte_stream:
45 |             for chunk in req.iter_content(chunk_size=10000):
46 |                 byte_stream.write(chunk)
47 |             with ZipFile(byte_stream, "r") as zo:
48 |                 list_of_files = [i for i in zo.namelist() if i.endswith("dcm")]
49 |                 for dcm in list_of_files:
50 |                     item = zo.read(dcm)
51 |                     bucket_key = f"{seriesuid}/{modality}/{dcm.replace('./', '')}"
52 |                     S3_CLIENT.Bucket(BUCKET_NAME).put_object(Key=bucket_key, Body=item)
53 |                     print(
54 |                         f"[INFO] Writing DICOM object: {bucket_key} to bucket: {BUCKET_NAME}\n\n"
55 |                     )
56 | 
57 | image_series.toDF().foreach(get_image_stream_unzip)
58 | 
59 | job.commit()
60 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/run_crawlers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | export AWS_DEFAULT_OUTPUT=text
 4 | 
 5 | project_name=$1
 6 | 
 7 | # start imaging crawlers
 8 | imaging_crawlers=$(
 9 |     aws cloudformation describe-stack-resources \
10 |         --stack-name "${project_name}-Imaging" \
11 |         --query 'StackResources[?ResourceType==`AWS::Glue::Crawler`].PhysicalResourceId')
12 | 
13 | for img_crawler_name in ${imaging_crawlers}; do
14 |     aws glue start-crawler --name ${img_crawler_name}
15 | done
16 | printf "Crawlers started successfully\n"
17 | 
18 | # start glue jobs (not needed)
19 | #workflow_name=$(aws cloudformation describe-stacks \
20 | #                    --stack-name "${project_name}-Imaging" \
21 | #                    --query 'Stacks[0].Outputs[?OutputKey==`TCGAWorkflow`].OutputValue')
22 | #
23 | #aws glue start-workflow-run --name ${workflow_name}
24 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/setup/lambda.py:
--------------------------------------------------------------------------------
  1 | # /*********************************************************************************************************************
  2 | # *  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           *
  3 | # *                                                                                                                    *
  4 | # *  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    *
  5 | # *  with the License. A copy of the License is located at                                                             *
  6 | # *                                                                                                                    *
  7 | # *      http://www.apache.org/licenses/LICENSE-2.0                                                                    *
  8 | # *                                                                                                                    *
  9 | # *  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES *
 10 | # *  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    *
 11 | # *  and limitations under the License.                                                                                *
 12 | # *********************************************************************************************************************/
 13 | 
 14 | from __future__ import print_function
 15 | from crhelper import CfnResource
 16 | import logging
 17 | import boto3
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | # Initialise the helper, all inputs are optional, this example shows the defaults
 21 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL')
 22 | 
 23 | try:
 24 |     codebuild = boto3.client('codebuild')
 25 |     # pass
 26 | except Exception as e:
 27 |     helper.init_failure(e)
 28 | 
 29 | 
 30 | @helper.create
 31 | def create(event, context):
 32 |     logger.info("Got Create")
 33 |     start_build_job(event, context)
 34 | 
 35 | 
 36 | @helper.update
 37 | def update(event, context):
 38 |     logger.info("Got Update")
 39 |     start_build_job(event, context)
 40 | 
 41 | 
 42 | @helper.delete
 43 | def delete(event, context):
 44 |     logger.info("Got Delete")
 45 |     start_build_job(event, context, action='teardown')
 46 |     # Delete never returns anything. Should not fail if the underlying resources are already deleted. Desired state.
 47 | 
 48 | 
 49 | @helper.poll_create
 50 | def poll_create(event, context):
 51 |     logger.info("Got Create poll")
 52 |     return check_build_job_status(event, context)
 53 | 
 54 | 
 55 | @helper.poll_update
 56 | def poll_update(event, context):
 57 |     logger.info("Got Update poll")
 58 |     return check_build_job_status(event, context)
 59 | 
 60 | 
 61 | @helper.poll_delete
 62 | def poll_delete(event, context):
 63 |     logger.info("Got Delete poll")
 64 |     return check_build_job_status(event, context)
 65 | 
 66 | 
 67 | def handler(event, context):
 68 |     helper(event, context)
 69 | 
 70 | 
 71 | def start_build_job(event, context, action='setup'):
 72 |     response = codebuild.start_build(
 73 |         projectName=event['ResourceProperties']['CodeBuildProjectName'],
 74 |         environmentVariablesOverride=[{
 75 |             'name': 'SOLUTION_ACTION',
 76 |             'value': action,
 77 |             'type': 'PLAINTEXT'
 78 |         }]
 79 |     )
 80 |     logger.info(response)
 81 | 
 82 |     helper.Data.update({"JobID": response['build']['id']})
 83 | 
 84 | 
 85 | def check_build_job_status(event, context):
 86 |     code_build_project_name = event['ResourceProperties']['CodeBuildProjectName']
 87 | 
 88 |     if not helper.Data.get("JobID"):
 89 |         raise ValueError("Job ID missing in the polling event.")
 90 | 
 91 |     job_id = helper.Data.get("JobID")
 92 | 
 93 |     # 'SUCCEEDED' | 'FAILED' | 'FAULT' | 'TIMED_OUT' | 'IN_PROGRESS' | 'STOPPED'
 94 |     response = codebuild.batch_get_builds(ids=[job_id])
 95 |     build_status = response['builds'][0]['buildStatus']
 96 | 
 97 |     if build_status == 'IN_PROGRESS':
 98 |         logger.info(build_status)
 99 |         return None
100 |     else:
101 |         if build_status == 'SUCCEEDED':
102 |             logger.info(build_status)
103 |             return True
104 |         else:
105 |             msg = "Code Build job '{0}' in project '{1}' exited with a build status of '{2}'. Please check the code build job output log for more information." \
106 |                 .format(job_id, code_build_project_name, build_status)
107 |             logger.info(msg)
108 |             raise ValueError(msg)
109 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisCode/setup/requirements.txt:
--------------------------------------------------------------------------------
1 | crhelper
2 | 


--------------------------------------------------------------------------------
/source/GenomicsAnalysisZone/zone_cfn.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | AWSTemplateFormatVersion: 2010-09-09
 3 | Description: GenomicsAnalysisZone
 4 | 
 5 | # CodeCommit
 6 | #   Repo
 7 | 
 8 | Parameters:
 9 |   ResourcePrefix:
10 |     Type: String
11 |     Default: GenomicsAnalysis
12 |   ResourcePrefixLowercase:
13 |     Type: String
14 |     Default: genomicsanalysis
15 |     
16 | Resources:
17 |   # CodeCommit
18 |   Repo:
19 |     DeletionPolicy: Retain
20 |     Type: AWS::CodeCommit::Repository
21 |     Properties:
22 |       RepositoryName: !Sub ${ResourcePrefix}-Pipe
23 |       RepositoryDescription: !Sub ${ResourcePrefix}-Pipe
24 | Outputs:
25 |   RepoName:
26 |     Description: RepoName
27 |     Value: !Sub ${Repo.Name}
28 |   RepoHttpUrl:
29 |     Description: RepoCloneCommand
30 |     Value: !Sub ${Repo.CloneUrlHttp}
31 | 
32 | # aws cloudformation update-stack --stack-name GenomicsAnalysisZone --template-body file://template_cfn.yml --capabilities CAPABILITY_IAM --output text; aws cloudformation wait stack-update-complete --stack-name GenomicsAnalysisZone
33 | 
34 | # aws cloudformation create-stack --stack-name GenomicsAnalysisZone --template-body file://template_cfn.yml --capabilities CAPABILITY_IAM --enable-termination-protection --output text; aws cloudformation wait stack-create-complete --stack-name GenomicsAnalysisZone; aws cloudformation describe-stacks --stack-name GenomicsAnalysisZone --query 'Stacks[].Outputs[?OutputKey==`RepoCloneCommand`].OutputValue' --output text
35 | 
36 | 


--------------------------------------------------------------------------------
/source/TCIA_etl.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | #
  4 | # Permission is hereby granted, free of charge, to any person
  5 | # obtaining a copy of this software and associated documentation files
  6 | # (the "Software"), to deal in the Software without restriction,
  7 | # including without limitation the rights to use, copy, modify, merge,
  8 | # publish, distribute, sublicense, and/or sell copies of the Software,
  9 | # and to permit persons to whom the Software is furnished to do so.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 12 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 13 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 14 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 15 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 16 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 17 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 18 | # SOFTWARE.
 19 | 
 20 | AWSTemplateFormatVersion: '2010-09-09'
 21 | Description: |
 22 |   This CloudFormation Template deploys Glue jobs and crawlers for TCGA
 23 |   data
 24 | 
 25 | Parameters:
 26 |   ResourcesBucket:
 27 |     Type: String
 28 | 
 29 |   ResourcePrefix:
 30 |     Type: String
 31 |     
 32 |   ResourcePrefixLowercase:
 33 |     Type: String
 34 |     
 35 |   DatabaseName:
 36 |     Type: String
 37 |     Default: AUTO
 38 |     Description: |
 39 |       If not AUTO, references an existing Glue database for crawlers
 40 |       to create tables in.
 41 |     
 42 |   DataLakeBucket:
 43 |     Description: |
 44 |       S3 bucket where results will be written. Bucketname needs to be
 45 |       unique. The bucket name must respect the S3 bucket naming
 46 |       conventions (can contain lowercase letters, numbers, periods and
 47 |       hyphens).
 48 |     Type: String
 49 |     AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))"
 50 |     
 51 |   ExistingBucket:
 52 |     Description: Is this an existing bucket?
 53 |     Type: String
 54 |     AllowedValues:
 55 |       - "Yes"
 56 |       - "No"
 57 |     Default: "No"
 58 | 
 59 | Conditions:
 60 |   BucketDoesNotExist:
 61 |     Fn::Equals:
 62 |       - !Ref ExistingBucket
 63 |       - "No"
 64 | 
 65 |   NeedsGlueDatabase: !Equals [!Ref DatabaseName, "AUTO"]
 66 | 
 67 | Resources:
 68 | 
 69 |   TCGAS3Bucket:
 70 |     Type: AWS::S3::Bucket
 71 |     Condition: BucketDoesNotExist
 72 |     DeletionPolicy: Retain
 73 |     UpdateReplacePolicy: Retain
 74 |     Properties:
 75 |       BucketName: !Ref DataLakeBucket
 76 |       BucketEncryption:
 77 |         ServerSideEncryptionConfiguration:
 78 |           - ServerSideEncryptionByDefault:
 79 |              SSEAlgorithm: AES256
 80 | 
 81 |   GlueJobRole:
 82 |     Type: AWS::IAM::Role
 83 |     Properties:
 84 |       AssumeRolePolicyDocument:
 85 |         Version: "2012-10-17"
 86 |         Statement:
 87 |           - Effect: "Allow"
 88 |             Principal:
 89 |               Service: "glue.amazonaws.com"
 90 |             Action: "sts:AssumeRole"
 91 |       Path: "/"
 92 |       ManagedPolicyArns:
 93 |         - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
 94 |       Policies:
 95 |         - PolicyName: athena_access
 96 |           PolicyDocument:
 97 |             Version: 2012-10-17
 98 |             Statement:
 99 |               - Effect: Allow
100 |                 Action:
101 |                   - athena:StartQueryExecution
102 |                   - athena:GetQueryExecution
103 |                   - athena:GetQueryResults
104 |                 Resource:
105 |                   - !Sub arn:aws:athena:${AWS::Region}:${AWS::AccountId}:workgroup/primary
106 |         - PolicyName: kms_access
107 |           PolicyDocument:
108 |             Version: 2012-10-17
109 |             Statement:
110 |               - Effect: Allow
111 |                 Action:
112 |                   - kms:GenerateDataKey
113 |                   - kms:Decrypt
114 |                   - kms:Encrypt
115 |                 Resource:
116 |                   - !ImportValue
117 |                       Fn::Sub: '${ResourcePrefix}-DataCatalogEncryptionKeyArn'
118 |         - PolicyName: "CrawlerAccess"
119 |           PolicyDocument:
120 |             Version: "2012-10-17"
121 |             Statement:
122 |               - Effect: "Allow"
123 |                 Action:
124 |                   - s3:PutObject
125 |                   - s3:GetObject
126 |                   - s3:ListBucket
127 |                   - s3:DeleteObject
128 |                 Resource:
129 |                   - !Sub 'arn:aws:s3:::${DataLakeBucket}'
130 |                   - !Sub 'arn:aws:s3:::${DataLakeBucket}/*'
131 |               - Effect: "Allow"
132 |                 Action:
133 |                   - s3:GetObject
134 |                   - s3:ListBucket
135 |                 Resource:
136 |                   - !Sub 'arn:aws:s3:::${ResourcesBucket}'
137 |                   - !Sub 'arn:aws:s3:::${ResourcesBucket}/*'
138 |       
139 |   TcgaLuadExpressionGlueJob:
140 |     Type: AWS::Glue::Job
141 |     Properties:
142 |       Command: 
143 |         Name: glueetl
144 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
145 |       DefaultArguments:
146 |         "--output_bucket": !Ref 'DataLakeBucket'
147 |         "--project": "TCGA-LUAD"
148 |         "--data_type": "Gene Expression Quantification"
149 |       GlueVersion: "2.0"
150 |       ExecutionProperty:
151 |         MaxConcurrentRuns: 2
152 |       MaxRetries: 0
153 |       Role: !Ref GlueJobRole
154 |   
155 |   TcgaLuadMutationGlueJob:
156 |     Type: AWS::Glue::Job
157 |     Properties:
158 |       Command:
159 |         Name: glueetl
160 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
161 |       DefaultArguments:
162 |         "--output_bucket": !Ref 'DataLakeBucket'
163 |         "--project": "TCGA-LUAD"
164 |         "--data_type": "Masked Somatic Mutation"
165 |       GlueVersion: "2.0"
166 |       ExecutionProperty:
167 |         MaxConcurrentRuns: 2
168 |       MaxRetries: 0
169 |       Role: !Ref GlueJobRole
170 |    
171 |   TcgaLuadCnvGlueJob:
172 |     Type: AWS::Glue::Job
173 |     Properties:
174 |       Command: 
175 |         Name: glueetl
176 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
177 |       DefaultArguments:
178 |         "--output_bucket": !Ref 'DataLakeBucket'
179 |         "--project": "TCGA-LUAD"
180 |         "--data_type": "Gene Level Copy Number"
181 |       GlueVersion: "2.0"
182 |       ExecutionProperty:
183 |         MaxConcurrentRuns: 2
184 |       MaxRetries: 0
185 |       Role: !Ref GlueJobRole
186 | 
187 |   TcgaLuadClinicalGlueJob:
188 |     Type: AWS::Glue::Job
189 |     Properties:
190 |       Command: 
191 |         Name: glueetl
192 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
193 |       DefaultArguments:
194 |         "--output_bucket": !Ref 'DataLakeBucket'
195 |         "--project": "TCGA-LUAD"
196 |         "--data_type": "Clinical Supplement"
197 |       GlueVersion: "2.0"
198 |       ExecutionProperty:
199 |         MaxConcurrentRuns: 2
200 |       MaxRetries: 0
201 |       Role: !Ref GlueJobRole
202 | 
203 |   TcgaLuadImagingMetadataGlueJob:
204 |     Type: AWS::Glue::Job
205 |     Properties:
206 |       Command:
207 |         Name: glueetl
208 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/image_api_glue.py"
209 |       DefaultArguments:
210 |         "--output_bucket": !Ref 'DataLakeBucket'
211 |         "--project": "TCGA-LUAD"
212 |       GlueVersion: "2.0"
213 |       ExecutionProperty:
214 |         MaxConcurrentRuns: 2
215 |       MaxRetries: 0
216 |       Role: !Ref GlueJobRole
217 |       
218 |   TcgaLuscExpressionGlueJob:
219 |     Type: AWS::Glue::Job
220 |     Properties:
221 |       Command: 
222 |         Name: glueetl
223 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
224 |       DefaultArguments:
225 |         "--output_bucket": !Ref 'DataLakeBucket'
226 |         "--project": "TCGA-LUSC"
227 |         "--data_type": "Gene Expression Quantification"
228 |       GlueVersion: "2.0"
229 |       ExecutionProperty:
230 |         MaxConcurrentRuns: 2
231 |       MaxRetries: 0
232 |       Role: !Ref GlueJobRole
233 |   
234 |   TcgaLuscMutationGlueJob:
235 |     Type: AWS::Glue::Job
236 |     Properties:
237 |       Command:
238 |         Name: glueetl
239 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
240 |       DefaultArguments:
241 |         "--output_bucket": !Ref 'DataLakeBucket'
242 |         "--project": "TCGA-LUSC"
243 |         "--data_type": "Masked Somatic Mutation"
244 |       GlueVersion: "2.0"
245 |       ExecutionProperty:
246 |         MaxConcurrentRuns: 2
247 |       MaxRetries: 0
248 |       Role: !Ref GlueJobRole
249 |    
250 |   TcgaLuscCnvGlueJob:
251 |     Type: AWS::Glue::Job
252 |     Properties:
253 |       Command: 
254 |         Name: glueetl
255 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
256 |       DefaultArguments:
257 |         "--output_bucket": !Ref 'DataLakeBucket'
258 |         "--project": "TCGA-LUSC"
259 |         "--data_type": "Gene Level Copy Number"
260 |       GlueVersion: "2.0"
261 |       ExecutionProperty:
262 |         MaxConcurrentRuns: 2
263 |       MaxRetries: 0
264 |       Role: !Ref GlueJobRole
265 | 
266 |   TcgaLuscClinicalGlueJob:
267 |     Type: AWS::Glue::Job
268 |     Properties:
269 |       Command: 
270 |         Name: glueetl
271 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
272 |       DefaultArguments:
273 |         "--output_bucket": !Ref 'DataLakeBucket'
274 |         "--project": "TCGA-LUSC"
275 |         "--data_type": "Clinical Supplement"
276 |       GlueVersion: "2.0"
277 |       ExecutionProperty:
278 |         MaxConcurrentRuns: 2
279 |       MaxRetries: 0
280 |       Role: !Ref GlueJobRole
281 | 
282 |   TcgaLuscImagingMetadataGlueJob:
283 |     Type: AWS::Glue::Job
284 |     Properties:
285 |       Command:
286 |         Name: glueetl
287 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/image_api_glue.py"
288 |       DefaultArguments:
289 |         "--output_bucket": !Ref 'DataLakeBucket'
290 |         "--project": "TCGA-LUSC"
291 |       GlueVersion: "2.0"
292 |       ExecutionProperty:
293 |         MaxConcurrentRuns: 2
294 |       MaxRetries: 0
295 |       Role: !Ref GlueJobRole
296 | 
297 |   TcgaSummaryGlueJob:
298 |     Type: AWS::Glue::Job
299 |     Properties:
300 |       Command:
301 |         Name: glueetl
302 |         ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/create_tcga_summary.py"
303 |       DefaultArguments:
304 |         "--bucket": !Ref 'DataLakeBucket'
305 |         "--workgroup": "primary"  # does not work when set to the solution wg
306 |         "--database": !If
307 |           - NeedsGlueDatabase
308 |           - !Ref TcgaDb
309 |           - !Ref DatabaseName
310 |       GlueVersion: "2.0"
311 |       ExecutionProperty:
312 |         MaxConcurrentRuns: 2
313 |       MaxRetries: 0
314 |       Role: !Ref GlueJobRole
315 |       
316 |   TCGAMutationCrawler:
317 |     Type: AWS::Glue::Crawler
318 |     Properties: 
319 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-mut
320 |       Role: !Ref GlueJobRole
321 |       Description: AWS Glue crawler to crawl TCGA mutation data
322 |       DatabaseName: !If
323 |         - NeedsGlueDatabase
324 |         - !Ref TcgaDb
325 |         - !Ref DatabaseName
326 |       Targets:
327 |         S3Targets:
328 |           - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-mutation']]
329 |       SchemaChangePolicy:
330 |         UpdateBehavior: "UPDATE_IN_DATABASE"
331 |         DeleteBehavior: "LOG"
332 |       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
333 | 
334 |   TCGACNVCrawler:
335 |     Type: AWS::Glue::Crawler
336 |     Properties: 
337 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-cnv
338 |       Role: !Ref GlueJobRole
339 |       Description: AWS Glue crawler to crawl TCGA copy number data
340 |       DatabaseName: !If
341 |         - NeedsGlueDatabase
342 |         - !Ref TcgaDb
343 |         - !Ref DatabaseName
344 |       Targets:
345 |         S3Targets:
346 |           - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-cnv']]
347 |       SchemaChangePolicy:
348 |         UpdateBehavior: "UPDATE_IN_DATABASE"
349 |         DeleteBehavior: "LOG"
350 |       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
351 | 
352 |   TCGAExpressionCrawler:
353 |     Type: AWS::Glue::Crawler
354 |     Properties: 
355 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-exp
356 |       Role: !Ref GlueJobRole
357 |       Description: AWS Glue crawler to crawl TCGA expression data
358 |       DatabaseName: !If
359 |         - NeedsGlueDatabase
360 |         - !Ref TcgaDb
361 |         - !Ref DatabaseName
362 |       Targets:
363 |         S3Targets:
364 |           - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-expression']]
365 |       SchemaChangePolicy:
366 |         UpdateBehavior: "UPDATE_IN_DATABASE"
367 |         DeleteBehavior: "LOG"
368 |       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
369 | 
370 |   TCGAClinicalCrawler:
371 |     Type: AWS::Glue::Crawler
372 |     Properties: 
373 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-clin
374 |       Role: !Ref GlueJobRole
375 |       Description: AWS Glue crawler to crawl TCGA clinical data
376 |       DatabaseName: !If
377 |         - NeedsGlueDatabase
378 |         - !Ref TcgaDb
379 |         - !Ref DatabaseName
380 |       Targets:
381 |         S3Targets:
382 |           - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-clinical']]
383 |       SchemaChangePolicy:
384 |         UpdateBehavior: "UPDATE_IN_DATABASE"
385 |         DeleteBehavior: "LOG"
386 |       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
387 | 
388 |   TCGAImagingMetadataCrawler:
389 |     Type: AWS::Glue::Crawler
390 |     Properties: 
391 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-img
392 |       Role: !Ref GlueJobRole
393 |       Description: AWS Glue crawler to crawl TCGA imaging metadata
394 |       DatabaseName: !If
395 |         - NeedsGlueDatabase
396 |         - !Ref TcgaDb
397 |         - !Ref DatabaseName
398 |       Targets:
399 |         S3Targets:
400 |           - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcia-metadata']]
401 |       SchemaChangePolicy:
402 |         UpdateBehavior: "UPDATE_IN_DATABASE"
403 |         DeleteBehavior: "LOG"
404 |       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
405 | 
406 |   TCGASummaryCrawler:
407 |     Type: AWS::Glue::Crawler
408 |     Properties:
409 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-sum
410 |       Role: !Ref GlueJobRole
411 |       Description: AWS Glue crawler to crawl TCGA summary data
412 |       DatabaseName: !If
413 |         - NeedsGlueDatabase
414 |         - !Ref TcgaDb
415 |         - !Ref DatabaseName
416 |       Targets:
417 |         S3Targets:
418 |           - Path: !Join ['', ['s3://', !Ref DataLakeBucket, '/tcga-summary']]
419 |       SchemaChangePolicy:
420 |         UpdateBehavior: "UPDATE_IN_DATABASE"
421 |         DeleteBehavior: "LOG"
422 |       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"      
423 | 
424 |   TCGAWorkflow:
425 |     Type: AWS::Glue::Workflow
426 |     Properties:
427 |       Description: "Workflow that kicks off exp job and crawler"
428 | 
429 |   WorkflowStartTrigger:
430 |     Type: AWS::Glue::Trigger
431 |     Properties:
432 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-start
433 |       Type: ON_DEMAND
434 |       Description: Trigger for starting the workflow
435 |       Actions:
436 |         - JobName: !Ref TcgaLuadExpressionGlueJob
437 |         - JobName: !Ref TcgaLuadMutationGlueJob
438 |         - JobName: !Ref TcgaLuadCnvGlueJob
439 |         - JobName: !Ref TcgaLuadClinicalGlueJob
440 |         - JobName: !Ref TcgaLuadImagingMetadataGlueJob
441 |         - JobName: !Ref TcgaLuscExpressionGlueJob
442 |         - JobName: !Ref TcgaLuscMutationGlueJob
443 |         - JobName: !Ref TcgaLuscCnvGlueJob
444 |         - JobName: !Ref TcgaLuscClinicalGlueJob
445 |         - JobName: !Ref TcgaLuscImagingMetadataGlueJob
446 |       WorkflowName: !Ref TCGAWorkflow
447 | 
448 |   ExpCrawlerTrigger:
449 |     Type: AWS::Glue::Trigger
450 |     Properties:
451 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-exp
452 |       Type: "CONDITIONAL"
453 |       Description: "Description for a conditional job trigger"
454 |       Actions:
455 |         - CrawlerName: !Ref 'TCGAExpressionCrawler' 
456 |       StartOnCreation: true
457 |       Predicate:
458 |         Logical: AND
459 |         Conditions:
460 |           - LogicalOperator: EQUALS
461 |             JobName: !Ref 'TcgaLuadExpressionGlueJob'
462 |             State: SUCCEEDED
463 |           - LogicalOperator: EQUALS
464 |             JobName: !Ref 'TcgaLuscExpressionGlueJob'
465 |             State: SUCCEEDED
466 |       WorkflowName: !Ref TCGAWorkflow
467 | 
468 |   MutCrawlerTrigger:
469 |     Type: AWS::Glue::Trigger
470 |     Properties:
471 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-mut
472 |       Type: "CONDITIONAL"
473 |       Description: "Description for a conditional Mutation crawler job trigger"
474 |       Actions:
475 |         - CrawlerName: !Ref 'TCGAMutationCrawler' 
476 |       StartOnCreation: true
477 |       Predicate:
478 |         Logical: AND
479 |         Conditions:
480 |           - LogicalOperator: EQUALS
481 |             JobName: !Ref 'TcgaLuadMutationGlueJob'
482 |             State: SUCCEEDED
483 |           - LogicalOperator: EQUALS
484 |             JobName: !Ref 'TcgaLuscMutationGlueJob'
485 |             State: SUCCEEDED
486 |       WorkflowName: !Ref TCGAWorkflow
487 | 
488 |   CnvCrawlerTrigger:
489 |     Type: AWS::Glue::Trigger
490 |     Properties:
491 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-cnv
492 |       Type: "CONDITIONAL"
493 |       Description: "Description for a conditional CNV crawler job trigger"
494 |       Actions:
495 |         - CrawlerName: !Ref 'TCGACNVCrawler' 
496 |       StartOnCreation: true
497 |       Predicate:
498 |         Logical: AND
499 |         Conditions:
500 |           - LogicalOperator: EQUALS
501 |             JobName: !Ref 'TcgaLuadCnvGlueJob'
502 |             State: SUCCEEDED
503 |           - LogicalOperator: EQUALS
504 |             JobName: !Ref 'TcgaLuscCnvGlueJob'
505 |             State: SUCCEEDED
506 |       WorkflowName: !Ref TCGAWorkflow
507 | 
508 |   ClinCrawlerTrigger:
509 |     Type: AWS::Glue::Trigger
510 |     Properties:
511 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-clin
512 |       Type: "CONDITIONAL"
513 |       Description: "Description for a conditional Clinical crawler job trigger"
514 |       Actions:
515 |         - CrawlerName: !Ref 'TCGAClinicalCrawler' 
516 |       StartOnCreation: true
517 |       Predicate:
518 |         Logical: AND
519 |         Conditions:
520 |           - LogicalOperator: EQUALS
521 |             JobName: !Ref 'TcgaLuadClinicalGlueJob'
522 |             State: SUCCEEDED
523 |           - LogicalOperator: EQUALS
524 |             JobName: !Ref 'TcgaLuscClinicalGlueJob'
525 |             State: SUCCEEDED
526 |       WorkflowName: !Ref TCGAWorkflow
527 | 
528 |   ImMetaCrawlerTrigger:
529 |     Type: AWS::Glue::Trigger
530 |     Properties:
531 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-img
532 |       Type: "CONDITIONAL"
533 |       Description: "Description for a conditional Clinical crawler job trigger"
534 |       Actions:
535 |         - CrawlerName: !Ref 'TCGAImagingMetadataCrawler' 
536 |       StartOnCreation: true
537 |       Predicate:
538 |         Logical: AND
539 |         Conditions:
540 |           - LogicalOperator: EQUALS
541 |             JobName: !Ref 'TcgaLuadImagingMetadataGlueJob'
542 |             State: SUCCEEDED
543 |           - LogicalOperator: EQUALS
544 |             JobName: !Ref 'TcgaLuscImagingMetadataGlueJob'
545 |             State: SUCCEEDED
546 |       WorkflowName: !Ref TCGAWorkflow
547 | 
548 |   RunSummaryJobTrigger:
549 |     Type: AWS::Glue::Trigger
550 |     Properties:
551 |       Name: !Sub ${ResourcePrefixLowercase}-tcga-sum
552 |       Type: "CONDITIONAL"
553 |       Description: "Build TCGA summary"
554 |       Actions:
555 |         - JobName: !Ref TcgaSummaryGlueJob
556 |       StartOnCreation: true
557 |       Predicate:
558 |         Logical: AND
559 |         Conditions:
560 |           - LogicalOperator: EQUALS
561 |             CrawlerName: !Ref TCGAMutationCrawler
562 |             CrawlState: SUCCEEDED
563 |           - LogicalOperator: EQUALS
564 |             CrawlerName: !Ref TCGACNVCrawler
565 |             CrawlState: SUCCEEDED
566 |           - LogicalOperator: EQUALS
567 |             CrawlerName: !Ref TCGAExpressionCrawler
568 |             CrawlState: SUCCEEDED
569 |           - LogicalOperator: EQUALS
570 |             CrawlerName: !Ref TCGAClinicalCrawler
571 |             CrawlState: SUCCEEDED
572 |           - LogicalOperator: EQUALS
573 |             CrawlerName: !Ref TCGAImagingMetadataCrawler
574 |             CrawlState: SUCCEEDED
575 |       WorkflowName: !Ref TCGAWorkflow
576 | 
577 |   TcgaDb:
578 |     Type: AWS::Glue::Database
579 |     Condition: NeedsGlueDatabase
580 |     Properties:
581 |       CatalogId: !Ref AWS::AccountId
582 |       DatabaseInput:
583 |         Description: "AWS Glue container to hold tables for the TCGA crawlers"
584 |         
585 |       
586 | Outputs:
587 |   DataLakeBucket:
588 |     Value: !Ref DataLakeBucket
589 | 
590 |   TCGAWorkflow:
591 |     Value: !Ref TCGAWorkflow
592 | 
593 |   CreateQuicksightLink:
594 |     Value: !Sub "https://${AWS::Region}.console.aws.amazon.com/cloudformation/home\
595 |       ?region=${AWS::Region}#/stacks/create/review\
596 |       ?templateURL=https://s3.${AWS::Region}.amazonaws.com/${ResourcesBucket}/quicksight_cfn.yml\
597 |       &stackName=${ResourcePrefix}-Quicksight\
598 |       &param_Project=${ResourcePrefix}"
599 | 


--------------------------------------------------------------------------------
/source/setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -e
  2 | 
  3 | export AWS_DEFAULT_OUTPUT=text
  4 | 
  5 | create_stack() {
  6 |   local stack_name=${1}
  7 |   local template_name=${2}
  8 |   local ResourcePrefix=${3}
  9 | 
 10 |   local ResourcePrefix_lowercase=$(echo ${ResourcePrefix} | tr '[:upper:]' '[:lower:]')
 11 |   
 12 |   aws cloudformation create-stack --stack-name ${stack_name} --template-body file://${template_name} --parameters ParameterKey=ResourcePrefix,ParameterValue=${ResourcePrefix} ParameterKey=ResourcePrefixLowercase,ParameterValue=${ResourcePrefix_lowercase} --capabilities CAPABILITY_NAMED_IAM --no-enable-termination-protection; aws cloudformation wait stack-create-complete --stack-name ${stack_name}
 13 | }
 14 | 
 15 | clone_and_commit() {
 16 |   local stack_name=${1}
 17 | 
 18 |   local repo_http_url=$(aws cloudformation describe-stacks --stack-name ${stack_name} --query 'Stacks[].Outputs[?OutputKey==`RepoHttpUrl`].OutputValue')
 19 | 
 20 |   git init .; git remote add origin ${repo_http_url}
 21 | 
 22 |   git add *; git commit -m "first commit"; git push --set-upstream origin master
 23 | 
 24 | }
 25 | 
 26 | wait_for_pipeline() {
 27 |   local pipeline_name=${1}
 28 |   local commit_id=${2}
 29 | 
 30 |   local message="Max attempts reached. Pipeline execution failed for commit: ${commit_id}"
 31 |   for i in {1..60}; do
 32 | 
 33 |     stage_status=$(aws codepipeline list-pipeline-executions --pipeline-name ${pipeline_name} --query 'pipelineExecutionSummaries[?sourceRevisions[0].revisionId==`'${commit_id}'`].status')
 34 | 
 35 |     if [ "${stage_status}" == "InProgress" ] || [ -z "${stage_status}" ]; then
 36 |       printf '.'
 37 |       sleep 30
 38 |     elif [ "${stage_status}" == "Succeeded" ]; then
 39 |       message="CodePipeline execution succeeded for commit: ${commit_id}"
 40 |       break
 41 |     elif [ "${stage_status}" == "Failed" ]; then
 42 |       message="CodePipeline execution Failed for commit: ${commit_id}"
 43 |       break
 44 |     fi
 45 | 
 46 |   done
 47 |   printf "\n${message}\n"
 48 |   if [ "${stage_status}" == "Failed" ]; then exit 1; fi
 49 | }
 50 | 
 51 | copy_unpack_zip() {
 52 |   local source_artifact=${1}
 53 |   local dest_prefix=${2}
 54 | 
 55 |   echo "Unpacking ${source_artifact} to ${dest_prefix}"
 56 |   aws s3 cp ${source_artifact} ./temporary.zip
 57 |   mkdir stage
 58 |   pushd stage; unzip ../temporary.zip; popd
 59 |   aws s3 sync stage/ ${dest_prefix}
 60 |   rm -rf stage temporary.zip
 61 | }
 62 | 
 63 | copy_and_upload() {
 64 |   local source_artifact=${1}
 65 |   local dest_artifact=${2}
 66 |   local filename=${3}
 67 | 
 68 |   aws s3 cp ${source_artifact} ${filename}
 69 |   aws s3 cp ${filename} ${dest_artifact}
 70 |   rm ${filename}
 71 | }
 72 | 
 73 | copy_test_data() {
 74 |   local artifact_bucket=${1}
 75 |   local artifact_key_prefix=${2}
 76 |   local pipe_stackname=${3}
 77 | 
 78 |   local data_lake_bucket=$(aws cloudformation describe-stacks --stack-name ${pipe_stackname} --query 'Stacks[].Outputs[?OutputKey==`DataLakeBucket`].OutputValue' --output text)
 79 | 
 80 |   
 81 |   copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-clinical.zip s3://${data_lake_bucket}/
 82 |   copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-cnv.zip s3://${data_lake_bucket}/
 83 |   copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-expression.zip s3://${data_lake_bucket}/
 84 |   copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-mutation.zip s3://${data_lake_bucket}/
 85 |   copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcia-metadata.zip s3://${data_lake_bucket}/
 86 |   copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-summary.zip s3://${data_lake_bucket}/
 87 |   
 88 |   copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/annotation/clinvar/clinvar.vcf.gz s3://${data_lake_bucket}/annotation/vcf/clinvar/clinvar.vcf.gz clinvar.vcf.gz 
 89 |   copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/vcf/variants.vcf.gz s3://${data_lake_bucket}/variants/vcf/variants.vcf.gz variants.vcf.gz
 90 |   copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz s3://${data_lake_bucket}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz  
 91 |   copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/references/hg38/Homo_sapiens_assembly38.fasta s3://${data_lake_bucket}/references/hg38/Homo_sapiens_assembly38.fasta Homo_sapiens_assembly38.fasta 
 92 | }
 93 | 
 94 | setup() {
 95 | 
 96 |   local resource_prefix=$1
 97 |   local artifact_bucket=$2
 98 |   local artifact_key_prefix=$3
 99 | 
100 |   local dir_prefix="GenomicsAnalysis"
101 | 
102 |   local zone_dir="${dir_prefix}Zone"
103 |   local pipe_dir="${dir_prefix}Pipe"
104 |   local code_dir="${dir_prefix}Code"
105 | 
106 |   local zone_stackname=${resource_prefix}-LandingZone
107 |   local pipe_stackname=${resource_prefix}-Pipeline
108 | 
109 |   # Create stacks
110 |   create_stack "${zone_stackname}" "${zone_dir}/zone_cfn.yml" "${resource_prefix}"
111 |   create_stack "${pipe_stackname}" "${pipe_dir}/pipe_cfn.yml" "${resource_prefix}"
112 | 
113 |   # Clone and commit resources
114 |   cd "${pipe_dir}"; clone_and_commit "${zone_stackname}"; cd ..
115 |   cd "${code_dir}"; clone_and_commit "${pipe_stackname}";
116 | 
117 |   # Get the last commit id
118 |   commit_id=$(git log -1 --pretty=format:%H)
119 |   cd ..
120 | 
121 |   # Get pipeline name
122 |   pipeline_name=$(aws cloudformation describe-stack-resource --stack-name ${pipe_stackname} --logical-resource-id CodePipeline --query 'StackResourceDetail.PhysicalResourceId')
123 | 
124 |   # Copy Test Data
125 |   copy_test_data "${artifact_bucket}" "${artifact_key_prefix}" "${pipe_stackname}"
126 | 
127 |   # Wait for pipeline execution using commit id
128 |   wait_for_pipeline "${pipeline_name}" "${commit_id}"
129 | 
130 |   # Run Crawlers for TCGA data
131 |   "${code_dir}/run_crawlers.sh" "${resource_prefix}"
132 | }
133 | 
134 | project_name=${PROJECT_NAME:-GenomicsAnalysis}
135 | 
136 | setup "$project_name" "${ARTIFACT_BUCKET}" "${ARTIFACT_KEY_PREFIX}"
137 | 


--------------------------------------------------------------------------------
/source/setup_cfn.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | 
  3 | Description: |
  4 |   (SO0077) - This Guidance for Multi-Omics and Multi-Modal Data Integration and Analysis on AWS creates a scalable environment in AWS to prepare genomic, clinical, mutation, expression and imaging data for large-scale analysis and perform interactive queries against a data lake. This solution demonstrates how to 1)Provision Amazon Omics resources to ingest, store and query genomics data, 2) provision serverless data ingestion pipelines for multi-modal data preparation and cataloging, 3) visualize and explore clinical data through an interactive interface, and 4) run interactive analytic queries against a multi-modal data lake. Version v3.0.0.
  5 | 
  6 | Mappings:
  7 |   Send:
  8 |     AnonymousUsage:
  9 |       Data: Yes
 10 |   SourceCode:
 11 |     General:
 12 |       S3Bucket: '%%BUCKET_NAME%%'
 13 |       KeyPrefix: '%%SOLUTION_NAME%%/%%VERSION%%'
 14 | 
 15 | Parameters:
 16 |   Project:
 17 |     Type: String
 18 |     Description: >
 19 |       The project name for this solution. The project name will be used to prefix resources created by this solution. The solution Glue database name and Athena workgroup name will be the project name. Project names should be unique to a project.
 20 |     AllowedPattern: "[a-zA-Z0-9-]{3,24}"
 21 |     ConstraintDescription: >
 22 |       Project name should be unique, 3-24 characters in length, and only have alphanumeric characters and hyphens ([a-zA-Z0-9-]{3,32}).
 23 |     Default: GenomicsAnalysis
 24 | 
 25 | Resources:
 26 |   Setup:
 27 |     Type: Custom::Setup
 28 |     DependsOn:
 29 |       - CodeBuild
 30 |     Version: 1.0
 31 |     Properties:
 32 |       ServiceToken: !Sub ${SetupLambda.Arn}
 33 |       CodeBuildProjectName: !Sub ${CodeBuild}
 34 | 
 35 |   SetupLambda:
 36 |     Type: AWS::Lambda::Function
 37 |     DependsOn:
 38 |       - SetupLambdaRole
 39 |     Properties:
 40 |       Handler: lambda.handler
 41 |       Runtime: python3.8
 42 |       FunctionName: !Sub ${Project}Setup
 43 |       Code:
 44 |         S3Bucket: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]]
 45 |         S3Key: !Join ["", [!FindInMap ["SourceCode", "General", "KeyPrefix"], "/SolutionSetup.zip"]]
 46 |       Role: !Sub ${SetupLambdaRole.Arn}
 47 |       Timeout: 600
 48 |     Metadata:
 49 |       cfn_nag:
 50 |         rules_to_suppress:
 51 |           - id: W58
 52 |             reason: Bug in CfnNag.
 53 |           - id: W89
 54 |             reason: Lambda only used on setup.
 55 |           - id: W92
 56 |             reason: No need for concurrent execution.
 57 |   SetupLambdaRole:
 58 |     Type: AWS::IAM::Role
 59 |     DependsOn:
 60 |       - CodeBuild
 61 |     Properties:
 62 |       AssumeRolePolicyDocument:
 63 |         Version: 2012-10-17
 64 |         Statement:
 65 |           - Action:
 66 |               - sts:AssumeRole
 67 |             Effect: Allow
 68 |             Principal:
 69 |               Service:
 70 |                 - lambda.amazonaws.com
 71 |       Path: /
 72 |       Policies:
 73 |         - PolicyName: LogsAccess
 74 |           PolicyDocument:
 75 |             Statement:
 76 |               - Effect: Allow
 77 |                 Action:
 78 |                   - logs:CreateLogGroup
 79 |                   - logs:CreateLogStream
 80 |                   - logs:PutLogEvents
 81 |                 Resource:
 82 |                   - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${Project}*
 83 |         - PolicyName: CodeBuildAccess
 84 |           PolicyDocument:
 85 |             Statement:
 86 |               - Effect: Allow
 87 |                 Action:
 88 |                   - codebuild:BatchGetProjects
 89 |                   - codebuild:BatchGetBuilds
 90 |                   - codebuild:StartBuild
 91 |                 Resource:
 92 |                   - !Sub ${CodeBuild.Arn}
 93 |         - PolicyName: EventsAccess
 94 |           PolicyDocument:
 95 |             Statement:
 96 |               - Effect: Allow
 97 |                 Action:
 98 |                   - events:DeleteRule
 99 |                   - events:PutRule
100 |                   - events:PutTargets
101 |                   - events:RemoveTargets
102 |                 Resource:
103 |                   - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/Setup*
104 |         - PolicyName: LambdaAccess
105 |           PolicyDocument:
106 |             Statement:
107 |               - Effect: Allow
108 |                 Action:
109 |                   - lambda:AddPermission
110 |                   - lambda:RemovePermission
111 |                 Resource:
112 |                   - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${Project}*
113 | 
114 |   CodeBuildRole:
115 |       Type: AWS::IAM::Role
116 |       Properties:
117 |         AssumeRolePolicyDocument:
118 |           Version: 2012-10-17
119 |           Statement:
120 |             - Action:
121 |                 - sts:AssumeRole
122 |               Effect: Allow
123 |               Principal:
124 |                 Service:
125 |                   - codebuild.amazonaws.com
126 |         Path: /
127 |         Policies:
128 |           - PolicyName: CloudFormationAccess
129 |             PolicyDocument:
130 |               Statement:
131 |                 - Action:
132 |                     - cloudformation:CreateStack
133 |                     - cloudformation:DescribeStacks
134 |                     - cloudformation:DescribeStackResource
135 |                     - cloudformation:DescribeStackResources
136 |                     - cloudformation:UpdateStack
137 |                     - cloudformation:DeleteStack
138 |                     - cloudformation:UpdateTerminationProtection
139 |                   Effect: Allow
140 |                   Resource: !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${Project}*
141 |           - PolicyName: LogsAccess
142 |             PolicyDocument:
143 |               Statement:
144 |                 - Effect: Allow
145 |                   Action:
146 |                     - logs:CreateLogGroup
147 |                     - logs:CreateLogStream
148 |                     - logs:PutLogEvents
149 |                   Resource:
150 |                     - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/${Project}*
151 |           - PolicyName: IAMAccess
152 |             PolicyDocument:
153 |               Statement:
154 |                 - Effect: Allow
155 |                   Action:
156 |                     - iam:CreateRole
157 |                     - iam:DeleteRole
158 |                     - iam:PutRolePolicy
159 |                     - iam:DeleteRolePolicy
160 |                     - iam:AttachRolePolicy
161 |                     - iam:DetachRolePolicy
162 |                     - iam:UpdateAssumeRolePolicy
163 |                     - iam:PassRole
164 |                     - iam:GetRole
165 |                     - iam:GetInstanceProfile
166 |                     - iam:CreateInstanceProfile
167 |                     - iam:DeleteInstanceProfile
168 |                     - iam:AddRoleToInstanceProfile
169 |                     - iam:RemoveRoleFromInstanceProfile
170 |                   Resource:
171 |                     - !Sub arn:aws:iam::${AWS::AccountId}:role/${Project}*
172 |                     - !Sub arn:aws:iam::${AWS::AccountId}:instance-profile/${Project}*
173 |                     - !Sub arn:aws:iam::${AWS::AccountId}:role/aws-quicksight-service-role-v0
174 |           - PolicyName: CodeBuildAccess
175 |             PolicyDocument:
176 |               Statement:
177 |                 - Effect: Allow
178 |                   Action:
179 |                     - codebuild:CreateProject
180 |                     - codebuild:UpdateProject
181 |                     - codebuild:ListProjects
182 |                     - codebuild:BatchGetProjects
183 |                     - codebuild:DeleteProject
184 |                   Resource:
185 |                     - !Sub arn:aws:codebuild:${AWS::Region}:${AWS::AccountId}:project/${Project}*
186 |           - PolicyName: CodePipelineAccess
187 |             PolicyDocument:
188 |               Statement:
189 |                 - Effect: Allow
190 |                   Action:
191 |                     - codepipeline:CreatePipeline
192 |                     - codepipeline:GetPipeline
193 |                     - codepipeline:UpdatePipeline
194 |                     - codepipeline:DeletePipeline
195 |                     - codepipeline:GetPipelineState
196 |                     - codepipeline:ListPipelineExecutions
197 |                   Resource:
198 |                     - !Sub arn:aws:codepipeline:${AWS::Region}:${AWS::AccountId}:${Project}*
199 |           - PolicyName: CodeCommitAccess
200 |             PolicyDocument:
201 |               Statement:
202 |                 - Effect: Allow
203 |                   Action:
204 |                     - codecommit:CreateBranch
205 |                     - codecommit:CreateRepository
206 |                     - codecommit:GetRepository
207 |                     - codecommit:DeleteRepository
208 |                     - codecommit:CreateCommit
209 |                     - codecommit:GitPush
210 |                     - codecommit:GitPull
211 |                     - codecommit:DeleteBranch
212 |                   Resource:
213 |                     - !Sub arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:${Project}*
214 |                 - Effect: Allow
215 |                   Action:
216 |                     - codecommit:ListRepositories
217 |                   Resource: '*'
218 |           - PolicyName: EventsAccess
219 |             PolicyDocument:
220 |               Statement:
221 |                 - Effect: Allow
222 |                   Action:
223 |                     - events:DescribeRule
224 |                     - events:PutRule
225 |                     - events:DeleteRule
226 |                     - events:PutTargets
227 |                     - events:RemoveTargets
228 |                   Resource:
229 |                     - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/*
230 |           - PolicyName: GlueAccess
231 |             PolicyDocument:
232 |               Statement:
233 |                 - Effect: Allow
234 |                   Action:
235 |                     - glue:StartCrawler
236 |                     - glue:GetCrawlers
237 |                     - glue:StartWorkflowRun
238 |                   Resource: '*'
239 |           - PolicyName: LambdaAccess
240 |             PolicyDocument:
241 |               Statement:
242 |                 - Effect: Allow
243 |                   Action:
244 |                     - lambda:GetFunction
245 |                     - lambda:CreateFunction
246 |                     - lambda:DeleteFunction
247 |                     - lambda:InvokeFunction
248 |                   Resource:
249 |                     - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${Project}*
250 |           - PolicyName: S3Access
251 |             PolicyDocument:
252 |               Statement:
253 |                 - Effect: Allow
254 |                   Action:
255 |                     - s3:GetObject
256 |                   Resource:
257 |                     !Join
258 |                       - ''
259 |                       - - 'arn:aws:s3:::'
260 |                         - !Join
261 |                             - '-'
262 |                             - - !FindInMap ["SourceCode", "General", "S3Bucket"]
263 |                               - Ref: "AWS::Region"
264 |                         - '/*'
265 |                 - Effect: Allow
266 |                   Action:
267 |                     - s3:ListBucket
268 |                   Resource:
269 |                     !Join
270 |                       - ''
271 |                       - - 'arn:aws:s3:::'
272 |                         - !Join
273 |                             - '-'
274 |                             - - !FindInMap ["SourceCode", "General", "S3Bucket"]
275 |                               - Ref: "AWS::Region"
276 | 
277 |                 - Effect: Allow
278 |                   Action:
279 |                     - s3:PutObjectAcl
280 |                     - s3:GetObject
281 |                     - s3:PutObject
282 |                     - s3:DeleteObject
283 |                     - s3:ListBucket
284 |                     - s3:CreateBucket
285 |                     - s3:DeleteBucket
286 |                     - s3:PutEncryptionConfiguration
287 |                     - s3:PutBucketPublicAccessBlock
288 |                     - s3:PutBucketLogging
289 |                     - s3:PutBucketAcl
290 |                     - s3:PutBucketOwnershipControls
291 |                   Resource:
292 |                     - arn:aws:s3:::*pipe*
293 |                     - arn:aws:s3:::*pipe*/*
294 |                 - Effect: Allow
295 |                   Action:
296 |                     - s3:CreateBucket
297 |                     - s3:DeleteBucket
298 |                     - s3:ListBucket
299 |                     - s3:PutEncryptionConfiguration
300 |                     - s3:PutBucketPublicAccessBlock
301 |                     - s3:PutBucketLogging
302 |                     - s3:PutBucketAcl
303 |                     - s3:PutObject
304 |                     - s3:PutObjectAcl
305 |                     - s3:PutBucketOwnershipControls
306 |                   Resource:
307 |                     - arn:aws:s3:::*pipe*
308 |                     - arn:aws:s3:::*pipe*/*
309 | 
310 |       Metadata:
311 |         cfn_nag:
312 |           rules_to_suppress:
313 |             - id: W11
314 |               reason: Star required for codecommit:ListRepositories and Glue actions.
315 |   
316 |   CodeBuild:
317 |     Type: AWS::CodeBuild::Project
318 |     Properties:
319 |       Name: !Sub ${Project}Setup
320 |       Artifacts:
321 |         Type: NO_ARTIFACTS
322 |       Source:
323 |         Type: NO_SOURCE
324 |         BuildSpec: !Sub |
325 |           version: 0.2
326 |           phases:
327 |             install:
328 |               commands:
329 |                 - git config --global user.name automated_user
330 |                 - git config --global user.email automated_email
331 |                 - git config --global credential.helper '!aws codecommit credential-helper $@'
332 |                 - git config --global credential.UseHttpPath true
333 |                 - aws s3 cp s3://$ARTIFACT_BUCKET/$ARTIFACT_KEY_PREFIX/Solution.zip .
334 |                 - unzip Solution.zip
335 |                 - ./$SOLUTION_ACTION.sh
336 |       Environment:
337 |         ComputeType: BUILD_GENERAL1_SMALL
338 |         EnvironmentVariables:
339 |           - Name: SOLUTION_ACTION
340 |             Value: setup
341 |           - Name: PROJECT_NAME
342 |             Value: !Ref Project
343 |           - Name: ARTIFACT_BUCKET
344 |             Value: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]]
345 |           - Name: ARTIFACT_KEY_PREFIX
346 |             Value: !FindInMap ["SourceCode", "General", "KeyPrefix"]
347 |         Image: aws/codebuild/standard:6.0
348 |         Type: LINUX_CONTAINER
349 |       ServiceRole: !Sub ${CodeBuildRole}
350 |       TimeoutInMinutes: 60
351 |     Metadata:
352 |       cfn_nag:
353 |         rules_to_suppress:
354 |           - id: W32
355 |             reason: Customer can enable encryption if desired.
356 | 


--------------------------------------------------------------------------------
/source/teardown.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export AWS_DEFAULT_OUTPUT=text
 4 | 
 5 | export RESOURCE_PREFIX=${PROJECT_NAME:-GenomicsAnalysis}
 6 | export RESOURCE_PREFIX_LOWERCASE=$(echo ${RESOURCE_PREFIX} | tr '[:upper:]' '[:lower:]')
 7 | 
 8 | export ZONE_STACKNAME=${RESOURCE_PREFIX}-LandingZone
 9 | export PIPE_STACKNAME=${RESOURCE_PREFIX}-Pipeline
10 | export GENE_STACKNAME=${RESOURCE_PREFIX}-Genomics
11 | export IMG_STACKNAME=${RESOURCE_PREFIX}-Imaging
12 | export QS_STACKNAME=${RESOURCE_PREFIX}-Quicksight
13 | export OMICS_STACKNAME=${RESOURCE_PREFIX}-Omics
14 | export REPOSITORY_NAME=${RESOURCE_PREFIX_LOWERCASE}
15 | 
16 | HAS_QS_STACK=$(aws cloudformation describe-stacks --stack-name ${QS_STACKNAME} && echo 1)
17 | HAS_GENE_STACK=$(aws cloudformation describe-stacks --stack-name ${GENE_STACKNAME} && echo 1)
18 | HAS_IMG_STACK=$(aws cloudformation describe-stacks --stack-name ${IMG_STACKNAME} && echo 1)
19 | HAS_OMICS_STACK=$(aws cloudformation describe-stacks --stack-name ${OMICS_STACKNAME} && echo 1)
20 | HAS_PIPE_STACK=$(aws cloudformation describe-stacks --stack-name ${PIPE_STACKNAME} && echo 1)
21 | HAS_ZONE_STACK=$(aws cloudformation describe-stacks --stack-name ${ZONE_STACKNAME} && echo 1)
22 | 
23 | set -e
24 | 
25 | # Clear Buckets
26 | 
27 | if [[ -n $HAS_PIPE_STACK ]]; then
28 |     BUILD_BUCKET=$(aws cloudformation describe-stacks --stack-name ${PIPE_STACKNAME} --query 'Stacks[].Outputs[?OutputKey==`BuildBucket`].OutputValue'); echo ${BUILD_BUCKET}
29 |     RESOURCES_BUCKET=$(aws cloudformation describe-stacks --stack-name ${PIPE_STACKNAME} --query 'Stacks[].Outputs[?OutputKey==`ResourcesBucket`].OutputValue'); echo ${RESOURCES_BUCKET}
30 |     DATALAKE_BUCKET=$(aws cloudformation describe-stacks --stack-name ${PIPE_STACKNAME} --query 'Stacks[].Outputs[?OutputKey==`DataLakeBucket`].OutputValue'); echo ${DATALAKE_BUCKET}
31 |     LOGS_BUCKET=$(aws cloudformation describe-stacks --stack-name ${PIPE_STACKNAME} --query 'Stacks[].Outputs[?OutputKey==`LogsBucket`].OutputValue'); echo ${LOGS_BUCKET}
32 | 
33 |     [[ -n $BUILD_BUCKET ]] && aws s3 rm --recursive s3://${BUILD_BUCKET}/
34 |     [[ -n $RESOURCES_BUCKET ]] && aws s3 rm --recursive s3://${RESOURCES_BUCKET}/
35 |     [[ -n $DATALAKE_BUCKET ]] && aws s3 rm --recursive s3://${DATALAKE_BUCKET}/
36 |     [[ -n $LOGS_BUCKET ]] && aws s3 rm --recursive s3://${LOGS_BUCKET}/ 
37 | fi
38 | 
39 | # Disable Termination Protection on Stacks
40 | 
41 | [[ -n $HAS_PIPE_STACK ]] && aws cloudformation update-termination-protection --no-enable-termination-protection --stack-name ${PIPE_STACKNAME} 
42 | [[ -n $HAS_ZONE_STACK ]] && aws cloudformation update-termination-protection --no-enable-termination-protection --stack-name ${ZONE_STACKNAME}
43 | 
44 | # Get Repo Names from Stacks
45 | 
46 | PIPE_REPO=$(aws cloudformation describe-stacks --stack-name ${ZONE_STACKNAME} --query 'Stacks[].Outputs[?OutputKey==`RepoName`].OutputValue'); echo ${PIPE_REPO}
47 | CODE_REPO=$(aws cloudformation describe-stacks --stack-name ${PIPE_STACKNAME} --query 'Stacks[].Outputs[?OutputKey==`RepoName`].OutputValue'); echo ${CODE_REPO}
48 | 
49 | # Delete Stacks
50 | 
51 | if [[ -n $HAS_QS_STACK ]]; then
52 |     aws cloudformation delete-stack --stack-name ${QS_STACKNAME}
53 |     aws cloudformation wait stack-delete-complete --stack-name ${QS_STACKNAME}
54 | fi
55 | if [[ -n $HAS_IMG_STACK ]]; then
56 |     aws cloudformation delete-stack --stack-name ${IMG_STACKNAME}
57 |     aws cloudformation wait stack-delete-complete --stack-name ${IMG_STACKNAME}
58 | fi
59 | if [[ -n $HAS_GENE_STACK ]]; then
60 |     aws cloudformation delete-stack --stack-name ${GENE_STACKNAME}
61 |     aws cloudformation wait stack-delete-complete --stack-name ${GENE_STACKNAME}
62 | fi
63 | if [[ -n $HAS_OMICS_STACK ]]; then
64 |     aws cloudformation delete-stack --stack-name ${OMICS_STACKNAME}
65 |     aws cloudformation wait stack-delete-complete --stack-name ${OMICS_STACKNAME}
66 | fi
67 | if [[ -n $HAS_PIPE_STACK ]]; then
68 |     aws cloudformation delete-stack --stack-name ${PIPE_STACKNAME}
69 |     aws cloudformation wait stack-delete-complete --stack-name ${PIPE_STACKNAME}
70 |     if [[ -n $LOGS_BUCKET ]]; then
71 |         aws s3 rm --recursive s3://${LOGS_BUCKET}/
72 |         sleep 1
73 |         aws s3 rb s3://${LOGS_BUCKET}
74 |     fi
75 | fi
76 | if [[ -n $HAS_ZONE_STACK ]]; then
77 |     aws cloudformation delete-stack --stack-name ${ZONE_STACKNAME}
78 |     aws cloudformation wait stack-delete-complete --stack-name ${ZONE_STACKNAME}
79 | fi
80 | 
81 | # Delete Repos
82 | 
83 | [[ -n $PIPE_REPO ]] && aws codecommit delete-repository --repository-name ${PIPE_REPO}
84 | [[ -n $CODE_REPO ]] && aws codecommit delete-repository --repository-name ${CODE_REPO}
85 | 
86 | # Cleanup Local Git Repo
87 | 
88 | find . \( -name ".git" -o -name ".gitignore" -o -name ".gitmodules" -o -name ".gitattributes" \) -exec rm -rf -- {} +
89 | 


--------------------------------------------------------------------------------