├── screenshots
    ├── IAMUsers.png
    ├── AssetView.jpg
    ├── Architecture.png
    ├── AthenaQuery.png
    ├── DataPortalURL.png
    ├── WhereToAthena.jpg
    ├── AddDomainExecRole.jpg
    ├── AttachPermissions.png
    ├── DataLakeBlueprint.png
    ├── DomainAssociation.png
    ├── DataZoneSourceQueries.png
    └── ehds_technical_figure1.png
├── medical_assets
    └── patients.parquet
├── CODE_OF_CONDUCT.md
├── LICENSE
├── CONTRIBUTING.md
├── README.md
├── secondaccount.yaml
└── mainaccount.yaml


/screenshots/IAMUsers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/IAMUsers.png


--------------------------------------------------------------------------------
/screenshots/AssetView.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/AssetView.jpg


--------------------------------------------------------------------------------
/screenshots/Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/Architecture.png


--------------------------------------------------------------------------------
/screenshots/AthenaQuery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/AthenaQuery.png


--------------------------------------------------------------------------------
/screenshots/DataPortalURL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/DataPortalURL.png


--------------------------------------------------------------------------------
/screenshots/WhereToAthena.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/WhereToAthena.jpg


--------------------------------------------------------------------------------
/medical_assets/patients.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/medical_assets/patients.parquet


--------------------------------------------------------------------------------
/screenshots/AddDomainExecRole.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/AddDomainExecRole.jpg


--------------------------------------------------------------------------------
/screenshots/AttachPermissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/AttachPermissions.png


--------------------------------------------------------------------------------
/screenshots/DataLakeBlueprint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/DataLakeBlueprint.png


--------------------------------------------------------------------------------
/screenshots/DomainAssociation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/DomainAssociation.png


--------------------------------------------------------------------------------
/screenshots/DataZoneSourceQueries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/DataZoneSourceQueries.png


--------------------------------------------------------------------------------
/screenshots/ehds_technical_figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use/main/screenshots/ehds_technical_figure1.png


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Using AWS for EHDS: a technical guide to building a data platform for secure health data use
  2 | 
  3 | ## Project overview
  4 | This repository contains the code and CloudFormation template to deploy the solution described in the [blog post](https://aws.amazon.com/blogs/publicsector/using-aws-for-ehds-a-technical-guide-to-building-a-secure-health-data-platform/). The post walks you through the technical implementation details for building such federated data governance and analysis platforms using AWS. Whether you are a healthcare organization, technology provider, or systems integrator, this post aims to equip you with the technical knowledge to build a secure data platform for secondary usage of health data in alignment with the EHDS Regulation. At the core of this implementation is [Amazon DataZone](https://aws.amazon.com/datazone/), an out-of-the-box data management service that offers a broad set of fine-grained access controls and data governance configurations. As illustrated in Figure 1, this enables secure data sharing and collaboration across multiple AWS accounts, teams, and departments. 
  5 | 
  6 | ![General Architecture Overview](./screenshots/ehds_technical_figure1.png) 
  7 | 
  8 | ## Considerations
  9 | 
 10 | The EHDS Regulation aims to provide a technology-agnostic framework to enable data sharing across systems. Although connections to external and third-party sources are supported through [AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html), the integration across different technological stacks requires more customized approaches and is out of the scope of this post. Hence, this post outlines how to build a secure data platform for secondary usage of health data across AWS environments.
 11 | 
 12 | ## AWS services mapped to the needs deriving from EHDS  
 13 | 
 14 | The EHDS envisions a structured approach to health data management across EU Member States, involving various stakeholders such as ministries of health, regional healthcare agencies, research institutions, healthcare providers, and pharmaceutical companies. The implementation specifics may vary by Member State.
 15 | 
 16 | AWS services can support each data journey stage for implementing the EHDS. Initial data handling (extraction, collection, streaming) uses [Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html), AWS Glue, and [Amazon EventBridge](https://aws.amazon.com/pm/eventbridge) for GDPR-compliant storage and event-driven processing. Data aggregation and transformation rely on AWS Glue and [Amazon SageMaker](https://aws.amazon.com/sagemaker/) for standardization and pseudonymization, while Amazon DataZone and AWS Identity and Access Management (IAM) enable secure cross-environment trust association and access management.
 17 | 
 18 | For secondary use, [Amazon Athena](https://docs.aws.amazon.com/athena/latest/ug/what-is.html) and Amazon DataZone facilitate efficient data discovery and querying. DataZone features support provider interactions, governance, and metadata management, while [AWS CloudFormation](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/Welcome.html) ensures scalable and consistent GDPR-compliant environments through infrastructure as code (IaC). [AWS CloudTrail](https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-user-guide.html) provides comprehensive access monitoring and audit capabilities, with DataZone maintaining data quality tracking and lineage.
 19 | 
 20 | ## Solution Architecture
 21 | In this blog post, we provide two scenarios of how to implement this architecture:
 22 | 
 23 | ![Solution Architecture](./screenshots/Architecture.png)
 24 | 
 25 | > **_NOTE:_** The architecture and technical implementation serve for demonstration purposes.
 26 | 
 27 | ### Single account deployment (Scenario 1)
 28 | This represents the primary governance account setup where an Amazon DataZone domain serves as the foundation for centralized data management. This scenario is better for development and testing of the Amazon DataZone capabilities. It creates a comprehensive data management infrastructure. 
 29 | 
 30 | ### Multi-account deployment (Scenario 2)
 31 | This represents an extension of the main setup into a secondary AWS account. It maintains similar infrastructure components but operates in a separate account. This enables cross-account data sharing and allows for distributed data management while maintaining centralized discovery and access control.
 32 | 
 33 | We will refer to the primary account configured in the single account deployment option as the governance account and the second account configured in the multi-account deployment as the producer account.
 34 | 
 35 | The solution enhances the Amazon DataZone domain's centralized data governance with automated workflows for data transformation and publishing. When data is uploaded to an S3 bucket, it triggers an automation flow that crawls the data with an AWS Glue Crawler, adds it to the AWS Glue Data Catalog, and automatically triggers a Data Source run. This allows for the data to be made available for publishing as soon as it is added to an S3 bucket. 
 36 | 
 37 | 
 38 | ## Solution deployment
 39 | 
 40 | We have prepared two CloudFormation templates that provision and configure the necessary AWS services. These templates provide two deployment options, one for each scenario.
 41 | The instructions are separated in two implementations, depending on your preference and availability of deployment to multiple accounts.
 42 | 
 43 | ### Prerequisites
 44 | 
 45 | The following prerequisites are necessary to deploy this solution:
 46 | 
 47 | - Access to at least one AWS account
 48 | - Administrative IAM permissions for [Amazon IAM](https://aws.amazon.com/iam/), [Amazon DataZone](https://aws.amazon.com/datazone/), [Amazon LakeFormation](https://aws.amazon.com/lake-formation/), [Amazon S3](https://aws.amazon.com/s3/), [AWS Lambda](https://aws.amazon.com/lambda/), [AWS Glue](https://aws.amazon.com/glue/).
 49 | 
 50 | Note: While the second deployment option uses two AWS accounts, neither deployment option requires an AWS Organizations setup. By default, all data at rest is encrypted using AWS-managed keys through AWS Key Management Service (KMS). You have the flexibility to use customer-managed keys if you prefer more control over your encryption settings.
 51 | 
 52 | ### Single account deployment
 53 | This section will walk you through the steps to configure the governance account. 
 54 | 
 55 | 1.	Clone the repository on GitHub using git. You can also manually download the CloudFormation templates from GitHub.
 56 | ```
 57 | git clone https://github.com/aws-samples/a-technical-guide-to-building-a-data-platform-for-secure-health-data-use.git
 58 | ```
 59 | 2.	In the governance account, go to the CloudFormation console within your chosen Region. Under the Stacks section, choose the Create Stack dropdown and choose With new resources (standard).
 60 | 3.	Leave the prerequisites section with the default settings. Under the Specify template section, choose Upload a template file. Upload the *mainaccount.yml* file.
 61 | 4.	Give a Stack name of your preference.
 62 | 5.	Choose Next. Leave everything default and choose the I acknowledge that AWS CloudFormation might create IAM resources checkbox.
 63 | 6.	Choose Next. Review the settings and choose Submit. 
 64 | 
 65 | After a few minutes, the deployment is complete and you should see an Amazon DataZone domain created. You can also get the data portal’s URL in the outputs of the CloudFormation template. 
 66 | 
 67 | The single account deployment is now complete. You can test the solution by moving to the section: Upload and publish data. 
 68 | 
 69 | If you would like to deploy a multi-account environment, follow this next section for setting up and configuring the multi-account deployment.
 70 | 
 71 | ### Multi-account deployment
 72 | This section will walk you through the steps to configure the producer account. You need to have completed the single account deployment steps to follow these instructions.
 73 | 
 74 | 1.	In the governance account, navigate to the Amazon SageMaker platform. On the dashboard, choose View existing domains for Amazon DataZone and choose the domain with the prefix: *DataZoneDomain-<stack-name>*
 75 | 2.	On the domain page, scroll down and on the first tab named “Account associations” choose Request association, as shown in the following figure.
 76 | 
 77 | ![Amazon DataZone account association](./screenshots/DomainAssociation.png)
 78 | 
 79 | 3.	A new page named Associate account opens. Enter the AWS Account ID of the data producer account that you want to associate.
 80 | 4.	Under RAM Policy, make sure to choose *AWSRAMPermissionDataZonePortalReadWrite*. Choose Request Association, as shown in the following figure.
 81 | 
 82 | ![Attach permissions to account association](./screenshots/AttachPermissions.png)
 83 | 
 84 | You have now requested to associate the producer account to the domain. This allows members of the associated account to access the domain’s data portal and start publishing data. 
 85 | 
 86 | 5.	Still in the governance account within the Amazon DataZone domain page, go to the tab User management, as shown in the following figure. 
 87 | 6.	Choose IAM Users from the dropdown. Copy the ARN of the role that contains *DataZoneDomainExecutionRole*. You need it for a further step, so paste it in a text editor for now.
 88 | 
 89 | ![Add DataZoneDomainExecutionRole](./screenshots/AddDomainExecRole.jpg)
 90 | 
 91 | Next, you create necessary resources and permissions to run the CloudFormation stack in the producer account.
 92 | 
 93 | 7.	Log in to your producer account, ensuring you are in the AWS Region where you deployed the Amazon DataZone domain in the governance account. 
 94 | 8.	On the Amazon SageMaker Platform console, choose View requests. You will see the association request from the governance account. Choose the request and select Review Request.
 95 | 9.	A new page opens named Accept & configure AWS association. Choose Accept new permissions.
 96 | 10.	Once the association has been created, click the associated domain. Copy the IAM role under the Data portal URL in your text editor, as showed here. 
 97 | 
 98 | ![Copy Data portal User ARN](./screenshots/DataPortalURL.png)
 99 | 
100 | 11.	Scroll down to the Default Blueprints section. Select the Default Data Lake option and click “Enable”. On the next page enable the option for hybrid mode in the Data location registration section. Leave everything as default and click “Enable Blueprint”.
101 | 
102 | ![Enable Default Data Lake Blueprint](./screenshots/DataLakeBlueprint.png)
103 | 
104 | Next you create an IAM role for a Lambda function allowing it to perform actions on the Amazon DataZone domain.
105 | 
106 | 12.	Still in the producer account, visit the IAM console. Choose Role then Create role.
107 | 13.	Make sure AWS service is chosen. In the Service or use case dropdown, choose Lambda.
108 | 14.	Choose Next. Don’t add any permissions, and choose Next again. 
109 | 15.	Give a name to this role, such as *Lambda-role*. Choose Create role.
110 | 16.	After creation, search for your role and select it. It should not have any permissions for now. Choose Add permissions and Create inline policy.
111 | 17.	Choose the JSON editor view and paste in the following policy. Make sure to replace the ARN with the Amazon DataZone domain execution role that you copied in Step 6.
112 | 
113 | ```
114 | {
115 |     "Version": "2012-10-17",
116 |     "Statement": [
117 |         {
118 |             "Effect": "Allow",
119 |             "Action": "sts:AssumeRole",
120 |             "Resource": "<DataZone-domain-execution-role-arn>"
121 |         }
122 |     ]
123 | }
124 | ```
125 | 
126 | 17.	Give the Policy a name and choose Create policy. Lastly, copy the ARN of this newly created IAM role for your Lambda function and paste it into your text editor.
127 | 
128 | You must make sure the producer account can access the domain, as the portal’s URL is still greyed out. 
129 | 
130 | 18.	Log back in to your Governance account and visit the Amazon DataZone console. Go to the Amazon DataZone domain under the User management tab, choose Add and Add IAM Users, as shown in the following figure.
131 | 
132 | ![Add IAM users to the Amazon DataZone domain](./screenshots/IAMUsers.png)
133 | 
134 | 19.	Next, choose the IAM Account option, Associated account, and paste the IAM Role ARN copied from the Amazon DataZone Data portal dashboard in Step 10. Choose Add then Add user(s).
135 | 20.	Visit the IAM console, choose Roles, and search for the prefix: *<StackName>-DataZoneDomainExecutionRole* that you copied in Step 6. 
136 | 21.	Choose the tab: Trust relationships and Edit trust policy.
137 | 22.	Under Principal, add the following: *"AWS": "<Lambda-role-arn>"*. This is the IAM Role ARN you copied in Step 17. Your trust policy should now look like the following:
138 | 
139 | ```
140 | {
141 |     "Version": "2012-10-17",
142 |     "Statement": [
143 |         {
144 |             "Effect": "Allow",
145 |             "Principal": {
146 |                 "Service": [
147 |                 "datazone.amazonaws.com",
148 |                 "lakeformation.amazonaws.com",
149 |                 "lambda.amazonaws.com"
150 |                 ],
151 |                 "AWS": "<Lambda-role-arn>"
152 |             },
153 |             "Action": [
154 |                 "sts:AssumeRole",
155 |                 "sts:TagSession"
156 |             ]
157 |         }
158 |     ]
159 | }
160 | ```
161 | 
162 | 23.	Choose Update policy
163 | 
164 | You are now ready to deploy the stack in the producer account! In the producer account, go to CloudFormation and follow the same steps as for the main account deployment. Make sure to deploy in the same Region as the domain. This time, upload the secondaccount.yml template. This template needs the following parameters:
165 | 
166 | - **AssumeRoleArn**: The ARN of the *DataZoneDomainExecution* role copied in Step 6.
167 | - **DataZoneDomainId**: The ID of the Amazon DataZone domain. You can find it in the Amazon DataZone console. The ARN of the domain is structured as follows: *arn:aws:datazone:REGION:ACCOUNT_ID:domain/DOMAIN_ID*
168 | - **HomeRegion**: The Region of the Amazon DataZone domain.
169 | - **LambdaRoleArn**: The ARN of the Lambda role copied in Step 17. 
170 | 
171 | Wait for the stack to deploy and complete. The stack deployed the previously described automation components. You can now start testing the solution!
172 | 
173 | ## Upload and publishing data
174 | 
175 | ### Upload data to Amazon S3 
176 | 
177 | If you are following the single account setup, then complete the following steps within your governance account. For the multi-account setup, complete the following steps in the producer (second) account:
178 | 
179 | 1.	In the Console search bar, enter "S3" to navigate to the Amazon S3 section of the Console.
180 | 2.	Choose the bucket with the suffix *ehds-assets*
181 | 3.	Choose Upload and choose Add folder. Choose the data inside the [medical_assets](./medical_assets) folder. Choose Upload.
182 | 
183 | ### View data assets
184 | 
185 | After uploading the data, navigate back to the Amazon SageMaker platform and to the DataZone associated domains dashboard.
186 | 1.	In the Amazon DataZone domain, choose the link Data portal URL. This opens the Amazon DataZone domain’s Data portal.
187 | 2.	A project has been pre-created for you. Choose the dropdown next to Select Project and choose the Project with the prefix *EHDS-*.
188 | 3.	In the Project’s dashboard page, go to Data Sources. Choose the DataSource named *ehds-assets*.
189 | 4.	It has successfully queried the AWS Glue table that you uploaded to the *ehds-assets* S3 bucket. Choose the asset link listed Asset Name, as shown in the following figure. 
190 | 
191 | ![Amazon DataZone Data Source queries](./screenshots/DataZoneSourceQueries.png)
192 | 
193 | 5.	On the asset’s summary page, you see details such as metadata, lineage, and data quality. It was pre-configured to auto-publish the data asset.
194 | 
195 | ### Search for published data assets
196 | 
197 | In this section we view the catalog capability of Amazon DataZone that allows search to publish data assets and subscribe to them. 
198 | 
199 | 1.	On the top bar of the Amazon DataZone Platform, choose the Catalogue icon, as shown in the following figure. 
200 | 
201 | ![Asset view on the DataZone console](./screenshots/AssetView.jpg) 
202 | 
203 | 2.	Within the Catalog view, choose the Amazon DataZone Domain from the left menu bar and view the data asset that has been published to the Domain’s meta data catalog. 
204 | 
205 | You can further filter by attributes such as data and asset type, owning project, source Region and account, and domain unit. 
206 | 
207 | ### Subscribe and query data assets 
208 | 
209 | If you have followed the multi-account deployment, then you can filter by source account. Choose the account ID of the account from which you have uploaded and published the data assets. Then, you can query the data with Amazon Athena.
210 | 
211 | 1.	Subscribe to the data asset by choosing the Subscribe button. 
212 | 2.	On the subscription page, enter a reason for subscribing, leave everything as default, and choose Request. The approval process was automated. Therefore, your subscription request is auto-approved. The default behavior needs the owner of the data asset to manually approve each request.
213 | 3.	Move back to the project overview page by choosing the project name on the top menu bar
214 | 4.	Within the project you have analytical tools listed on the right side of the overview page. Choose the option Query data (Amazon Athena), as shown in the following figure. 
215 | 5.	Choose Open Amazon Athena in the pop-up message.
216 | 
217 | ![Where to query data with Athena](./screenshots/WhereToAthena.jpg) 
218 | 
219 | ### Query subscribed data assets with Athena 
220 | 
221 | Within the Athena Console, follow these steps to query the data to which you have subscribed:
222 | 1.	From the left option bar, choose the Database with the suffix *_sub_db*. This lists the data assets to which you have subscribed.
223 | 2.	Run SQL queries to view the data to which you have subscribed.
224 | 
225 | By following these steps you have successfully uploaded, published, and subscribed to data assets, and queried the subscribed data using Athena.
226 | 
227 | ![Athena query example and results](./screenshots/AthenaQuery.png) 
228 | 
229 | 
230 | ## Security
231 | 
232 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
233 | 
234 | ## License
235 | 
236 | This library is licensed under the MIT-0 License. See the LICENSE file.
237 | 
238 | 


--------------------------------------------------------------------------------
/secondaccount.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: CloudFormation template for single EHDS reference architecture.
  3 | Parameters:
  4 |   LambdaRoleArn:
  5 |     Type: String
  6 |     ConstraintDescription: Must be a valid ARN
  7 |     Description: The ARN of the Lambda role that you manually created
  8 | 
  9 |   AssumeRoleArn:
 10 |     Type: String
 11 |     ConstraintDescription: Must be a valid ARN
 12 |     Description: The ARN of the role in the governance account. You can retrieve it from the output of the governance account cloudformation template
 13 | 
 14 |   HomeRegion:
 15 |     Type: String
 16 |     Description: AWS Region of the DataZone governance account
 17 |     AllowedValues:
 18 |       - eu-central-1
 19 |       - eu-west-1
 20 |       - eu-west-2
 21 |       - eu-north-1
 22 |       - us-east-1
 23 |       - us-east-2
 24 |       - us-west-2
 25 |       - ap-northeast-2
 26 |       - ap-southeast-1
 27 |       - ap-southeast-2
 28 |       - ap-northeast-1
 29 |       - ca-central-1
 30 |       - sa-east-1
 31 | 
 32 |   DataZoneDomainId:
 33 |     Type: String
 34 |     Description: ID of the DataZone domain in the DataZone governance account. It starts with "dzd_"
 35 | 
 36 | Resources:
 37 | 
 38 |   CentralAccountAssetsBucket:
 39 |     Type: AWS::S3::Bucket
 40 |     Properties:
 41 |       BucketName: !Sub ehds-assets-${AWS::AccountId}-${AWS::StackName}
 42 |       BucketEncryption:
 43 |         ServerSideEncryptionConfiguration:
 44 |           - ServerSideEncryptionByDefault:
 45 |               SSEAlgorithm: AES256 
 46 |       NotificationConfiguration:
 47 |         LambdaConfigurations:
 48 |           - Event: s3:ObjectCreated:*
 49 |             Function: !GetAtt LambdaFunctionTriggerGlueCrawler.Arn
 50 |   
 51 | 
 52 |   AddLambdaPermissions:
 53 |     Type: AWS::IAM::ManagedPolicy
 54 |     Properties:
 55 |       PolicyDocument:
 56 |         Version: '2012-10-17'
 57 |         Statement:
 58 |           - Sid: AllowSSMParameterAccess
 59 |             Effect: Allow
 60 |             Action:
 61 |               - ssm:GetParameter
 62 |               - ssm:PutParameter
 63 |               - ssm:DeleteParameter
 64 |               - ssm:DescribeParameters
 65 |             Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/datazone/${AWS::StackName}/*'
 66 |           - Sid: AllowSSMParameterList
 67 |             Effect: Allow
 68 |             Action:
 69 |               - ssm:DescribeParameters
 70 |             Resource: '*' 
 71 |       Roles: [!Select [1, !Split ["/", !Ref LambdaRoleArn]]]
 72 | 
 73 |   LambdaFunctionTriggerGlueCrawler:
 74 |     Type: AWS::Lambda::Function
 75 |     Properties:
 76 |       Code:
 77 |         ZipFile: |-
 78 |           # MIT No Attribution
 79 |           # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 80 |           # Permission is hereby granted, free of charge, to any person obtaining a copy of this
 81 |           # software and associated documentation files (the "Software"), to deal in the Software
 82 |           # without restriction, including without limitation the rights to use, copy, modify,
 83 |           # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 84 |           # permit persons to whom the Software is furnished to do so.
 85 |           # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 86 |           # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 87 |           # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 88 |           # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 89 |           # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 90 |           # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 91 |           import boto3
 92 |           import os
 93 |           import time
 94 |           def lambda_handler(event, context):
 95 |             glue = boto3.client('glue')
 96 |             lambda_client = boto3.client('lambda')
 97 |             lakeformation_client = boto3.client('lakeformation')
 98 |             crawler_name = os.environ.get('CRAWLER_NAME')
 99 |             role_arn = os.environ.get('ROLE_ARN')
100 |             database_name = os.environ.get('DATABASE_NAME')
101 |             catalog_id = os.environ.get('CATALOG_ID')
102 |             try:
103 |               response = glue.start_crawler(Name=crawler_name)
104 |               time.sleep(90)
105 |               return {
106 |                   'statusCode': 200,
107 |                   'body': 'Glue crawler started successfully and LF Data Filter Lambda was invoked.',
108 |                   'headers': {
109 |                       'Access-Control-Allow-Origin': '*',
110 |                       'Access-Control-Allow-Methods': 'OPTIONS, POST, GET, PUT, DELETE',
111 |                       'Access-Control-Allow-Headers': 'Content-Type',
112 |                   }
113 |               }
114 |             except Exception as e:
115 |               error_message = f"Error starting Glue crawler '{crawler_name}' or invoking DataFilter Lambda. Check permissions.: {str(e)}"
116 |               print(error_message)
117 |               return {
118 |                   'statusCode': 500,
119 |                   'body': error_message
120 |               };
121 |       Environment:
122 |         Variables:
123 |           CATALOG_ID: !Ref AWS::AccountId
124 |           DATABASE_NAME: !Ref LakeFormationDatabase
125 |           ROLE_ARN: !GetAtt LambdaRole.Arn
126 |           CRAWLER_NAME: !Sub datasetcrawler-${AWS::StackName}
127 |       FunctionName:  !Sub LambdaTriggerGlueCrawler-${AWS::StackName}
128 |       Handler: index.lambda_handler
129 |       Role: !GetAtt LambdaRole.Arn
130 |       Runtime: python3.13
131 |       Timeout: 180
132 |   LambdaRole:
133 |     Type: AWS::IAM::Role
134 |     Properties:
135 |       AssumeRolePolicyDocument:
136 |         Statement:
137 |           - Action: sts:AssumeRole
138 |             Effect: Allow
139 |             Principal:
140 |               Service: lambda.amazonaws.com
141 |         Version: '2012-10-17'
142 |       ManagedPolicyArns:
143 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
144 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole
145 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaRole
146 |       Policies:
147 |       - PolicyName: S3AdminAccess
148 |         PolicyDocument:
149 |           Version: '2012-10-17'
150 |           Statement:
151 |             - Sid: S3BucketAccess
152 |               Effect: Allow
153 |               Action:
154 |                 - s3:CreateBucket
155 |                 - s3:DeleteBucket
156 |                 - s3:ListBucket
157 |                 - s3:GetBucketLocation
158 |                 - s3:GetBucketPolicy
159 |                 - s3:PutBucketPolicy
160 |                 - s3:DeleteBucketPolicy
161 |                 - s3:GetBucketAcl
162 |                 - s3:PutBucketAcl
163 |                 - s3:GetBucketVersioning
164 |                 - s3:PutBucketVersioning
165 |                 - s3:GetBucketPublicAccessBlock
166 |                 - s3:PutBucketPublicAccessBlock
167 |                 - s3:GetObject
168 |                 - s3:PutObject
169 |                 - s3:DeleteObject
170 |                 - s3:GetObjectVersion
171 |                 - s3:GetObjectAcl
172 |                 - s3:PutObjectAcl
173 |                 - s3:GetBucketEncryption
174 |                 - s3:PutBucketEncryption
175 |               Resource: 
176 |                 - !Sub arn:aws:s3:::ehds-assets-${AWS::AccountId}-${AWS::StackName} 
177 |                 - !Sub arn:aws:s3:::ehds-assets-${AWS::AccountId}-${AWS::StackName}/*
178 |   
179 |   LambdaInvokePermission:
180 |     Type: AWS::Lambda::Permission
181 |     Properties:
182 |       FunctionName: !Ref LambdaFunctionTriggerGlueCrawler
183 |       Action: lambda:InvokeFunction
184 |       Principal: s3.amazonaws.com
185 |       SourceAccount: !Ref AWS::AccountId
186 |       SourceArn: !Sub arn:aws:s3:::ehds-assets-${AWS::AccountId}-${AWS::StackName}
187 | 
188 |   LakeFormationSettings:
189 |     Type: AWS::LakeFormation::DataLakeSettings
190 |     Properties:
191 |       Admins: 
192 |         - DataLakePrincipalIdentifier: !GetAtt LambdaRole.Arn
193 |         - DataLakePrincipalIdentifier: !GetAtt GlueRole.Arn
194 |         - DataLakePrincipalIdentifier: !Ref LambdaRoleArn
195 |       Parameters:
196 |         CROSS_ACCOUNT_VERSION: '4'
197 |         SET_CONTEXT: 'TRUE'
198 | 
199 |   GlueRole:
200 |     Type: AWS::IAM::Role
201 |     Properties:
202 |       AssumeRolePolicyDocument:
203 |         Statement:
204 |           - Action: sts:AssumeRole
205 |             Effect: Allow
206 |             Principal:
207 |               Service: 
208 |                 - glue.amazonaws.com
209 |                 - lakeformation.amazonaws.com
210 |         Version: '2012-10-17'
211 | 
212 |       ManagedPolicyArns:
213 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole
214 |       Policies:
215 |         - PolicyName: LakeFormationPermissions
216 |           PolicyDocument:
217 |             Version: '2012-10-17'
218 |             Statement:
219 |               - Effect: Allow
220 |                 Action:
221 |                   - lakeformation:GetDataAccess
222 |                   - lakeformation:GrantPermissions
223 |                   - lakeformation:RevokePermissions
224 |                   - lakeformation:ListPermissions
225 |                   - lakeformation:PutDataLakeSettings
226 |                   - lakeformation:GetResourceLFTags
227 |                   - lakeformation:ListLFTags
228 |                   - lakeformation:GetLFTag
229 |                   - lakeformation:SearchTablesByLFTags
230 |                   - lakeformation:AddLFTagsToResource
231 |                   - lakeformation:DeleteLFTag
232 |                   - lakeformation:RemoveLFTagsFromResource
233 |                   - lakeformation:SearchDatabasesByLFTags
234 |                   - lakeformation:UpdateLFTag
235 |                   - iam:ListUsers
236 |                   - iam:ListRoles
237 |                   - iam:GetRole
238 |                   - iam:GetRolePolicy
239 |                 Resource: '*'
240 |         - PolicyName: S3Access
241 |           PolicyDocument:
242 |             Version: '2012-10-17'
243 |             Statement:
244 |               - Sid: S3BucketAccess
245 |                 Effect: Allow
246 |                 Action:
247 |                   - s3:List*
248 |                   - s3:Get*
249 |                 Resource: !GetAtt CentralAccountAssetsBucket.Arn
250 |               - Sid: S3ObjectAccess
251 |                 Effect: Allow
252 |                 Action:
253 |                   - s3:GetObject
254 |                   - s3:PutObject
255 |                   - s3:DeleteObject
256 |                 Resource: !Sub ${CentralAccountAssetsBucket.Arn}/*
257 | 
258 |   LakeFormationDatabase:
259 |     Type: AWS::Glue::Database
260 |     Properties:
261 |       CatalogId: !Ref AWS::AccountId
262 |       DatabaseInput:
263 |         Name: !Sub ehds-assets-${AWS::StackName}
264 | 
265 |   LakeFormationDataLakeLocation:
266 |     Type: AWS::LakeFormation::Resource
267 |     Properties:
268 |       ResourceArn: !GetAtt CentralAccountAssetsBucket.Arn
269 |       UseServiceLinkedRole: false
270 |       HybridAccessEnabled: true
271 |       RoleArn: !GetAtt GlueRole.Arn
272 | 
273 |   GlueLakeformationPermissions:
274 |     Type: AWS::LakeFormation::Permissions
275 |     Properties:
276 |       DataLakePrincipal:
277 |         DataLakePrincipalIdentifier: !GetAtt GlueRole.Arn
278 |       Permissions:
279 |         - ALL
280 |       PermissionsWithGrantOption:
281 |         - ALL
282 |       Resource:
283 |         DatabaseResource:
284 |           CatalogId: !Ref AWS::AccountId
285 |           Name: !Ref LakeFormationDatabase
286 | 
287 |   GlueCrawler:
288 |     Type: AWS::Glue::Crawler
289 |     Properties:
290 |       DatabaseName: !Ref LakeFormationDatabase
291 |       Name: !Sub datasetcrawler-${AWS::StackName}
292 |       Role: !GetAtt GlueRole.Arn
293 |       Targets:
294 |         S3Targets:
295 |           - Path: !Sub s3://${CentralAccountAssetsBucket}
296 | 
297 |   GlueCrawlerTrigger:
298 |     Type: AWS::Glue::Trigger
299 |     Properties:
300 |       Actions:
301 |         - CrawlerName: !Ref GlueCrawler
302 |       Name: !Sub CrawlerTrigger-${AWS::StackName}
303 |       Type: ON_DEMAND
304 | 
305 |   LakeformationLambdaPermissions:
306 |     Type: AWS::LakeFormation::Permissions
307 |     Properties:
308 |       DataLakePrincipal:
309 |         DataLakePrincipalIdentifier: !GetAtt LambdaRole.Arn
310 |       Permissions:
311 |         - ALL
312 |       PermissionsWithGrantOption:
313 |         - ALL
314 |       Resource:
315 |         DatabaseResource:
316 |           CatalogId: !Ref AWS::AccountId
317 |           Name: !Ref LakeFormationDatabase
318 |   CentralAccountAssetsBucketPolicy:
319 |     Type: AWS::S3::BucketPolicy
320 |     Properties:
321 |       Bucket: !Ref CentralAccountAssetsBucket
322 |       PolicyDocument:
323 |         Version: '2012-10-17'
324 |         Statement:
325 |           - Sid: BucketLevelPermissions
326 |             Effect: Allow
327 |             Principal:
328 |               AWS:
329 |                 - !Sub arn:${AWS::Partition}:iam::${AWS::AccountId}:root
330 |             Action:
331 |               - s3:ListBucket
332 |               - s3:GetBucketLocation
333 |               - s3:PutBucketPolicy
334 |               - s3:GetBucketPolicy
335 |             Resource: !GetAtt CentralAccountAssetsBucket.Arn
336 |             Condition:
337 |               Bool:
338 |                 aws:SecureTransport: true
339 |           - Sid: ObjectLevelPermissions
340 |             Effect: Allow
341 |             Principal:
342 |               AWS:
343 |                 - !Sub arn:${AWS::Partition}:iam::${AWS::AccountId}:root
344 |             Action:
345 |               - s3:PutObject
346 |               - s3:GetObject
347 |               - s3:DeleteObject
348 |             Resource: !Sub ${CentralAccountAssetsBucket.Arn}/*
349 |             Condition:
350 |               Bool:
351 |                 aws:SecureTransport: true
352 |   LabProject:
353 |     Type: AWS::DataZone::Project
354 |     Properties:
355 |       DomainIdentifier: !Ref DataZoneDomainId
356 |       Name: !Sub EHDS-${AWS::StackName}
357 |       Description: Project for EHDS data
358 | 
359 |   ProjectMemberExecutionRole:
360 |     DependsOn: 
361 |       - LabProject
362 |     Type: AWS::DataZone::ProjectMembership
363 |     Properties:
364 |       Designation: PROJECT_OWNER
365 |       DomainIdentifier: !Ref DataZoneDomainId
366 |       Member: 
367 |         UserIdentifier: !Ref AssumeRoleArn
368 |       ProjectIdentifier: !GetAtt LabProject.Id
369 | 
370 |   CreateEnvironmentFunction:
371 |     Type: AWS::Lambda::Function
372 |     DependsOn: 
373 |      - LabProject
374 |      - AddLambdaPermissions
375 |     Properties:
376 |       Handler: index.lambda_handler
377 |       Role: !Ref LambdaRoleArn
378 |       Code:
379 |         ZipFile: |
380 |           import boto3
381 |           import json
382 |           import cfnresponse
383 |           import os
384 | 
385 |           def lambda_handler(event, context):
386 | 
387 |             request_type = event['RequestType']
388 |             if request_type == 'Delete':
389 |               cfnresponse.send(event, context, cfnresponse.SUCCESS, {'Message': 'Sucessfully deleted'})
390 |               return {
391 |                 "statusCode": 200,
392 |               }
393 |             
394 |             # Define the role ARN and session name
395 |             role_arn = os.environ["ASSUME_ROLE_ARN"]
396 |             session_name = "AssumeRoleSession"
397 | 
398 |             # Create the STS client
399 |             sts_client = boto3.client('sts')
400 | 
401 |             # Assume the cross-account role
402 |             assumed_role = sts_client.assume_role(
403 |               RoleArn=role_arn,
404 |               RoleSessionName=session_name
405 |             )
406 | 
407 |             # Extract temporary credentials
408 |             credentials = assumed_role['Credentials']
409 | 
410 |             # Use the assumed credentials to create a DataZone client
411 |             datazone_client = boto3.client(
412 |               'datazone',
413 |               aws_access_key_id=credentials['AccessKeyId'],
414 |               aws_secret_access_key=credentials['SecretAccessKey'],
415 |               aws_session_token=credentials['SessionToken']
416 |             )
417 | 
418 |             try:
419 |               # Find blueprint ID
420 |               environmentblueprintID = datazone_client.list_environment_blueprints(
421 |                 domainIdentifier=os.environ['DOMAIN_ID'],
422 |                 managed=True,
423 |                 name="DefaultDataLake"
424 |               )
425 | 
426 |               environment_blueprint_id = environmentblueprintID["items"][0]["id"]
427 |               
428 |               # Create the environment profile
429 | 
430 |               profile_response = datazone_client.create_environment_profile(
431 |                 name=f"environment-profile-{os.environ['STACK_NAME']}",
432 |                 environmentBlueprintIdentifier=environment_blueprint_id,
433 |                 domainIdentifier=os.environ['DOMAIN_ID'],
434 |                 awsAccountId=os.environ['AWS_ACCOUNT_ID'],
435 |                 awsAccountRegion=os.environ['REGION'],
436 |                 projectIdentifier=os.environ['PROJECT_ID']
437 |               )
438 | 
439 |               profile_id = profile_response['id']
440 | 
441 |               #Create environment
442 |               env_response = datazone_client.create_environment(
443 |                 name=f"environment-{os.environ['STACK_NAME']}",
444 |                 description="Environment for the project",
445 |                 domainIdentifier=os.environ['DOMAIN_ID'],
446 |                 projectIdentifier=os.environ['PROJECT_ID'],
447 |                 environmentProfileIdentifier=profile_id
448 |               )
449 | 
450 |               env_id = env_response['id']
451 | 
452 |               # Create Data Source
453 |               datasource_response = datazone_client.create_data_source(
454 |                   configuration={
455 |                       'glueRunConfiguration': {
456 |                           'relationalFilterConfigurations': [
457 |                               {'databaseName': os.environ['DATABASE_NAME']}
458 |                           ]
459 |                       }
460 |                   },
461 |                   domainIdentifier=os.environ['DOMAIN_ID'],
462 |                   enableSetting='ENABLED',
463 |                   environmentIdentifier=env_id,
464 |                   name='ehds-assets',
465 |                   projectIdentifier=os.environ['PROJECT_ID'],
466 |                   publishOnImport=True,
467 |                   type='GLUE'
468 |               )
469 |               
470 |               datasource_id = datasource_response['id']
471 | 
472 |               ssm = boto3.client('ssm')
473 | 
474 |               #Add to SSM
475 |               ssm.put_parameter(
476 |                   Name=f'/datazone/{os.environ["STACK_NAME"]}/datasource_id',
477 |                   Value=datasource_id,
478 |                   Type='String',
479 |                   Overwrite=True
480 |               )
481 | 
482 |               # Return the response to CloudFormation
483 |               responseData = {'Message': 'Environment successfully created'}
484 |               cfnresponse.send(event, context, cfnresponse.SUCCESS, responseData)
485 |               return {
486 |                   "statusCode": 200,
487 |               }
488 |             except Exception as e:
489 |               responseData = {'Message': str(e)}
490 |               print(e)
491 |               cfnresponse.send(event, context, cfnresponse.FAILED, responseData)
492 |               return {
493 |                   "statusCode": 500
494 |               }
495 | 
496 | 
497 |       Runtime: python3.13
498 |       Timeout: 60
499 |       Environment:
500 |         Variables:
501 |           ASSUME_ROLE_ARN: !Ref AssumeRoleArn
502 |           DOMAIN_ID: !Ref DataZoneDomainId
503 |           AWS_ACCOUNT_ID: !Ref AWS::AccountId
504 |           REGION: !Ref AWS::Region
505 |           PROJECT_ID: !GetAtt LabProject.Id
506 |           STACK_NAME: !Ref AWS::StackName
507 |           DATABASE_NAME: !Ref LakeFormationDatabase
508 | 
509 |   CreateEnvRun:
510 |     Type: "AWS::CloudFormation::CustomResource"
511 |     DependsOn:
512 |       - CreateEnvironmentFunction
513 |     Properties:
514 |       ServiceToken: !GetAtt CreateEnvironmentFunction.Arn
515 |       Region: !Ref AWS::Region
516 |      
517 |   TriggerDataSourceRunFunction:
518 |     Type: AWS::Lambda::Function
519 |     DependsOn: 
520 |       - AddLambdaPermissions
521 |     Properties:
522 |       Handler: index.handler
523 |       Role: !Ref LambdaRoleArn
524 |       Code:
525 |         ZipFile: |
526 |           # MIT No Attribution
527 |           # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
528 |           # Permission is hereby granted, free of charge, to any person obtaining a copy of this
529 |           # software and associated documentation files (the "Software"), to deal in the Software
530 |           # without restriction, including without limitation the rights to use, copy, modify,
531 |           # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
532 |           # permit persons to whom the Software is furnished to do so.
533 |           # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
534 |           # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
535 |           # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
536 |           # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
537 |           # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
538 |           # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
539 |           import boto3
540 |           import os
541 |           import logging
542 | 
543 |           logger = logging.getLogger()
544 |           logger.setLevel(logging.INFO)
545 | 
546 |           def handler(event, context):
547 | 
548 |             ssm = boto3.client('ssm')
549 |             data_source_id = ssm.get_parameter(Name=f'/datazone/{os.environ["STACK_NAME"]}/datasource_id')['Parameter']['Value']
550 | 
551 |             # Create the STS client
552 |             sts_client = boto3.client('sts')
553 | 
554 |             session_name = "AssumeRoleSession"
555 |             role_arn = os.environ["ASSUME_ROLE_ARN"]
556 | 
557 |             # Assume the cross-account role
558 |             assumed_role = sts_client.assume_role(
559 |               RoleArn=role_arn,
560 |               RoleSessionName=session_name
561 |             )
562 | 
563 |             # Extract temporary credentials
564 |             credentials = assumed_role['Credentials']
565 | 
566 |             # Use the assumed credentials to create a DataZone client
567 |             datazone_client = boto3.client(
568 |                 'datazone',
569 |                 aws_access_key_id=credentials['AccessKeyId'],
570 |                 aws_secret_access_key=credentials['SecretAccessKey'],
571 |                 aws_session_token=credentials['SessionToken']
572 |             )
573 | 
574 |             domain_id = os.environ['DOMAIN_ID']
575 |             
576 |             try:
577 |                 response = datazone_client.start_data_source_run(
578 |                     domainIdentifier=domain_id,
579 |                     dataSourceIdentifier=data_source_id
580 |                 )
581 |                 logger.info(f"DataSource run started successfully")
582 |                 return {
583 |                     'statusCode': 200,
584 |                     'body': 'DataSource run started successfully'
585 |                 }
586 |             except Exception as e:
587 |                 logger.error(f"Error starting DataSource run: {str(e)}")
588 |                 return {
589 |                     'statusCode': 500,
590 |                     'body': f'Error starting DataSource run: {str(e)}'
591 |                 }
592 |       Runtime: python3.13
593 |       Timeout: 60
594 |       Environment:
595 |         Variables:
596 |           DOMAIN_ID: !Ref DataZoneDomainId
597 |           ASSUME_ROLE_ARN: !Ref AssumeRoleArn
598 |           STACK_NAME: !Ref AWS::StackName
599 | 
600 |   GlueCrawlerStateChangeRule:
601 |     Type: AWS::Events::Rule
602 |     Properties:
603 |       Description: Trigger EventBridge rule to trigger Lambda when Glue crawler completes.
604 |       EventPattern:
605 |         source:
606 |           - aws.glue
607 |         detail-type:
608 |           - Glue Crawler State Change
609 |       State: ENABLED
610 |       Targets:
611 |         - Arn: !GetAtt TriggerDataSourceRunFunction.Arn
612 |           Id: TriggerDataSourceRunTarget
613 | 
614 |   LambdaPermissionForEventBridge:
615 |     Type: AWS::Lambda::Permission
616 |     Properties:
617 |       FunctionName: !Ref TriggerDataSourceRunFunction
618 |       Action: "lambda:InvokeFunction"
619 |       Principal: "events.amazonaws.com"
620 |       SourceArn: !GetAtt GlueCrawlerStateChangeRule.Arn
621 | 


--------------------------------------------------------------------------------
/mainaccount.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: CloudFormation template for single EHDS reference architecture.
  3 | Resources:
  4 |   DataLakeBucketName:
  5 |     Type: AWS::S3::Bucket
  6 |     Properties:
  7 |       BucketName: !Sub datazone-bucket-${AWS::AccountId}-${AWS::StackName}
  8 |       BucketEncryption:
  9 |         ServerSideEncryptionConfiguration:
 10 |           - ServerSideEncryptionByDefault:
 11 |               SSEAlgorithm: AES256 
 12 |   
 13 |   CentralAccountAssetsBucket:
 14 |     Type: AWS::S3::Bucket
 15 |     DependsOn:
 16 |       - LambdaFunctionTriggerGlueCrawler
 17 |       - LambdaInvokePermission
 18 |     Properties:
 19 |       BucketName: !Sub ehds-assets-${AWS::AccountId}-${AWS::StackName}
 20 |       BucketEncryption:
 21 |         ServerSideEncryptionConfiguration:
 22 |           - ServerSideEncryptionByDefault:
 23 |               SSEAlgorithm: AES256
 24 |       NotificationConfiguration:
 25 |         LambdaConfigurations:
 26 |           - Event: s3:ObjectCreated:*
 27 |             Function: !GetAtt LambdaFunctionTriggerGlueCrawler.Arn
 28 | 
 29 |   LambdaFunctionTriggerGlueCrawler:
 30 |     Type: AWS::Lambda::Function
 31 |     Properties:
 32 |       Code:
 33 |         ZipFile: |-
 34 |           # MIT No Attribution
 35 |           # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 36 |           # Permission is hereby granted, free of charge, to any person obtaining a copy of this
 37 |           # software and associated documentation files (the "Software"), to deal in the Software
 38 |           # without restriction, including without limitation the rights to use, copy, modify,
 39 |           # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 40 |           # permit persons to whom the Software is furnished to do so.
 41 |           # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 42 |           # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 43 |           # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 44 |           # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 45 |           # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 46 |           # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 47 |           import boto3
 48 |           import os
 49 |           import time
 50 |           def lambda_handler(event, context):
 51 |             glue = boto3.client('glue')
 52 |             lambda_client = boto3.client('lambda')
 53 |             lakeformation_client = boto3.client('lakeformation')
 54 |             crawler_name = os.environ.get('CRAWLER_NAME')
 55 |             role_arn = os.environ.get('ROLE_ARN')
 56 |             database_name = os.environ.get('DATABASE_NAME')
 57 |             catalog_id = os.environ.get('CATALOG_ID')
 58 |             try:
 59 |               response = glue.start_crawler(Name=crawler_name)
 60 |               time.sleep(90)
 61 |               return {
 62 |                   'statusCode': 200,
 63 |                   'body': 'Glue crawler started successfully and LF Data Filter Lambda was invoked.',
 64 |                   'headers': {
 65 |                       'Access-Control-Allow-Origin': '*',
 66 |                       'Access-Control-Allow-Methods': 'OPTIONS, POST, GET, PUT, DELETE',
 67 |                       'Access-Control-Allow-Headers': 'Content-Type',
 68 |                   }
 69 |               }
 70 |             except Exception as e:
 71 |               error_message = f"Error starting Glue crawler '{crawler_name}' or invoking DataFilter Lambda. Check permissions.: {str(e)}"
 72 |               print(error_message)
 73 |               return {
 74 |                   'statusCode': 500,
 75 |                   'body': error_message
 76 |               };
 77 |       Environment:
 78 |         Variables:
 79 |           CATALOG_ID: !Ref AWS::AccountId
 80 |           DATABASE_NAME: !Ref LakeFormationDatabase
 81 |           ROLE_ARN: !GetAtt LambdaRole.Arn
 82 |           CRAWLER_NAME: !Sub datasetcrawler-${AWS::StackName}
 83 |       FunctionName:  !Sub LambdaTriggerGlueCrawler-${AWS::StackName}
 84 |       Handler: index.lambda_handler
 85 |       Role: !GetAtt LambdaRole.Arn
 86 |       Runtime: python3.13
 87 |       Timeout: 180
 88 |     DependsOn:
 89 |       - LambdaRole
 90 | 
 91 |   LambdaRole:
 92 |     Type: AWS::IAM::Role
 93 |     Properties:
 94 |       AssumeRolePolicyDocument:
 95 |         Statement:
 96 |           - Action: sts:AssumeRole
 97 |             Effect: Allow
 98 |             Principal:
 99 |               Service: lambda.amazonaws.com
100 |         Version: '2012-10-17'
101 |       ManagedPolicyArns:
102 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
103 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole
104 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaRole
105 |       Policies:
106 |       - PolicyName: S3AdminAccess
107 |         PolicyDocument:
108 |           Version: '2012-10-17'
109 |           Statement:
110 |             - Sid: S3BucketAccess
111 |               Effect: Allow
112 |               Action:
113 |                 - s3:CreateBucket
114 |                 - s3:DeleteBucket
115 |                 - s3:ListBucket
116 |                 - s3:GetBucketLocation
117 |                 - s3:GetBucketPolicy
118 |                 - s3:PutBucketPolicy
119 |                 - s3:DeleteBucketPolicy
120 |                 - s3:GetBucketAcl
121 |                 - s3:PutBucketAcl
122 |                 - s3:GetBucketVersioning
123 |                 - s3:PutBucketVersioning
124 |                 - s3:GetBucketPublicAccessBlock
125 |                 - s3:PutBucketPublicAccessBlock
126 |                 - s3:GetObject
127 |                 - s3:PutObject
128 |                 - s3:DeleteObject
129 |                 - s3:GetObjectVersion
130 |                 - s3:GetObjectAcl
131 |                 - s3:PutObjectAcl
132 |                 - s3:GetBucketEncryption
133 |                 - s3:PutBucketEncryption
134 |               Resource: 
135 |                 - !Sub arn:aws:s3:::ehds-assets-${AWS::AccountId}-${AWS::StackName} 
136 |                 - !Sub arn:aws:s3:::ehds-assets-${AWS::AccountId}-${AWS::StackName}/*
137 | 
138 |   LambdaInvokePermission:
139 |     Type: AWS::Lambda::Permission
140 |     Properties:
141 |       FunctionName: !Ref LambdaFunctionTriggerGlueCrawler
142 |       Action: lambda:InvokeFunction
143 |       Principal: s3.amazonaws.com
144 |       SourceAccount: !Ref AWS::AccountId
145 |       SourceArn: !Sub arn:aws:s3:::ehds-assets-${AWS::AccountId}-${AWS::StackName}
146 | 
147 |   LakeFormationSettings:
148 |     DependsOn: DataZoneDomainExecutionRole
149 |     Type: AWS::LakeFormation::DataLakeSettings
150 |     Properties:
151 |       Admins: 
152 |         - DataLakePrincipalIdentifier: !GetAtt LambdaRole.Arn
153 |         - DataLakePrincipalIdentifier: !GetAtt GlueRole.Arn
154 |         - DataLakePrincipalIdentifier: !GetAtt DataZoneDomainExecutionRole.Arn
155 |       Parameters:
156 |         CROSS_ACCOUNT_VERSION: '4'
157 |         SET_CONTEXT: 'TRUE'
158 | 
159 |   GlueRole:
160 |     Type: AWS::IAM::Role
161 |     Properties:
162 |       AssumeRolePolicyDocument:
163 |         Statement:
164 |           - Action: sts:AssumeRole
165 |             Effect: Allow
166 |             Principal:
167 |               Service: 
168 |                 - glue.amazonaws.com
169 |                 - lakeformation.amazonaws.com
170 |         Version: '2012-10-17'
171 | 
172 |       ManagedPolicyArns:
173 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole
174 |       Policies:
175 |         - PolicyName: LakeFormationPermissions
176 |           PolicyDocument:
177 |             Version: '2012-10-17'
178 |             Statement:
179 |               - Effect: Allow
180 |                 Action:
181 |                   - lakeformation:GetDataAccess
182 |                   - lakeformation:GrantPermissions
183 |                   - lakeformation:RevokePermissions
184 |                   - lakeformation:ListPermissions
185 |                   - lakeformation:PutDataLakeSettings
186 |                   - lakeformation:GetResourceLFTags
187 |                   - lakeformation:ListLFTags
188 |                   - lakeformation:GetLFTag
189 |                   - lakeformation:SearchTablesByLFTags
190 |                   - lakeformation:AddLFTagsToResource
191 |                   - lakeformation:DeleteLFTag
192 |                   - lakeformation:RemoveLFTagsFromResource
193 |                   - lakeformation:SearchDatabasesByLFTags
194 |                   - lakeformation:UpdateLFTag
195 |                   - iam:ListUsers
196 |                   - iam:ListRoles
197 |                   - iam:GetRole
198 |                   - iam:GetRolePolicy
199 |                 Resource: '*'
200 |         - PolicyName: S3Access
201 |           PolicyDocument:
202 |             Version: '2012-10-17'
203 |             Statement:
204 |               - Sid: S3BucketAccess
205 |                 Effect: Allow
206 |                 Action:
207 |                   - s3:List*
208 |                   - s3:Get*
209 |                 Resource: !GetAtt CentralAccountAssetsBucket.Arn
210 |               - Sid: S3ObjectAccess
211 |                 Effect: Allow
212 |                 Action:
213 |                   - s3:GetObject
214 |                   - s3:PutObject
215 |                   - s3:DeleteObject
216 |                 Resource: !Sub ${CentralAccountAssetsBucket.Arn}/*
217 | 
218 |   LakeFormationDatabase:
219 |     Type: AWS::Glue::Database
220 |     Properties:
221 |       CatalogId: !Ref AWS::AccountId
222 |       DatabaseInput:
223 |         Name: !Sub ehds-assets-${AWS::StackName}
224 | 
225 |   LakeFormationDataLakeLocation:
226 |     Type: AWS::LakeFormation::Resource
227 |     DependsOn: GlueRole
228 |     Properties:
229 |       ResourceArn: !GetAtt CentralAccountAssetsBucket.Arn
230 |       UseServiceLinkedRole: false
231 |       HybridAccessEnabled: true
232 |       RoleArn: !GetAtt GlueRole.Arn
233 | 
234 |   GlueLakeformationPermissions:
235 |     Type: AWS::LakeFormation::Permissions
236 |     Properties:
237 |       DataLakePrincipal:
238 |         DataLakePrincipalIdentifier: !GetAtt GlueRole.Arn
239 |       Permissions:
240 |         - ALL
241 |       PermissionsWithGrantOption:
242 |         - ALL
243 |       Resource:
244 |         DatabaseResource:
245 |           CatalogId: !Ref AWS::AccountId
246 |           Name: !Ref LakeFormationDatabase
247 | 
248 |   GlueCrawler:
249 |     Type: AWS::Glue::Crawler
250 |     Properties:
251 |       DatabaseName: !Ref LakeFormationDatabase
252 |       Name: !Sub datasetcrawler-${AWS::StackName}
253 |       Role: !GetAtt GlueRole.Arn
254 |       Targets:
255 |         S3Targets:
256 |           - Path: !Sub s3://${CentralAccountAssetsBucket}
257 | 
258 |   GlueCrawlerTrigger:
259 |     Type: AWS::Glue::Trigger
260 |     Properties:
261 |       Actions:
262 |         - CrawlerName: !Sub datasetcrawler-${AWS::StackName}
263 |       Name: !Sub CrawlerTrigger-${AWS::StackName}
264 |       Type: ON_DEMAND
265 | 
266 |   LakeformationLambdaPermissions:
267 |     Type: AWS::LakeFormation::Permissions
268 |     Properties:
269 |       DataLakePrincipal:
270 |         DataLakePrincipalIdentifier: !GetAtt LambdaRole.Arn
271 |       Permissions:
272 |         - ALL
273 |       PermissionsWithGrantOption:
274 |         - ALL
275 |       Resource:
276 |         DatabaseResource:
277 |           CatalogId: !Ref AWS::AccountId
278 |           Name: !Ref LakeFormationDatabase
279 |   
280 |   CentralAccountAssetsBucketPolicy:
281 |     Type: AWS::S3::BucketPolicy
282 |     Properties:
283 |       Bucket: !Ref CentralAccountAssetsBucket
284 |       PolicyDocument:
285 |         Version: '2012-10-17'
286 |         Statement:
287 |           - Sid: BucketLevelPermissions
288 |             Effect: Allow
289 |             Principal:
290 |               AWS:
291 |                 - !Sub arn:${AWS::Partition}:iam::${AWS::AccountId}:root
292 |             Action:
293 |               - s3:ListBucket
294 |               - s3:GetBucketLocation
295 |               - s3:PutBucketPolicy
296 |               - s3:GetBucketPolicy
297 |             Resource: !GetAtt CentralAccountAssetsBucket.Arn
298 |             Condition:
299 |               Bool:
300 |                 aws:SecureTransport: true
301 |           - Sid: ObjectLevelPermissions
302 |             Effect: Allow
303 |             Principal:
304 |               AWS:
305 |                 - !Sub arn:${AWS::Partition}:iam::${AWS::AccountId}:root
306 |             Action:
307 |               - s3:PutObject
308 |               - s3:GetObject
309 |               - s3:DeleteObject
310 |             Resource: !Sub ${CentralAccountAssetsBucket.Arn}/*
311 |             Condition:
312 |               Bool:
313 |                 aws:SecureTransport: true
314 | 
315 | 
316 |   DataZoneDomainExecutionRole:
317 |     Type: AWS::IAM::Role
318 |     Properties:
319 |       AssumeRolePolicyDocument:
320 |         Version: '2012-10-17'
321 |         Statement:
322 |           - Effect: Allow
323 |             Principal:
324 |               Service:
325 |                 - datazone.amazonaws.com
326 |                 - lakeformation.amazonaws.com
327 |                 - lambda.amazonaws.com
328 |             Action:
329 |               - sts:AssumeRole
330 |               - sts:TagSession
331 |       ManagedPolicyArns:
332 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/AmazonDataZoneFullAccess
333 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
334 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/AmazonDataZoneRedshiftGlueProvisioningPolicy
335 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonDataZoneGlueManageAccessRolePolicy
336 | 
337 |   DataZoneDomain:
338 |     Type: AWS::DataZone::Domain
339 |     DependsOn: DataLakeBucketName
340 |     Properties:
341 |       Name: !Sub DataZoneDomain-${AWS::StackName}
342 |       Description: DataZone domain for healthcare data
343 |       DomainExecutionRole: !GetAtt DataZoneDomainExecutionRole.Arn
344 | 
345 |   LabEnvironmentBlueprintConfig:
346 |     Type: AWS::DataZone::EnvironmentBlueprintConfiguration
347 |     DependsOn:
348 |       - DataZoneDomain
349 |     Properties:
350 |       DomainIdentifier: !GetAtt DataZoneDomain.Id
351 |       EnabledRegions:
352 |         - !Ref AWS::Region
353 |       EnvironmentBlueprintIdentifier: DefaultDataLake
354 |       ManageAccessRoleArn: !GetAtt DataZoneDomainExecutionRole.Arn
355 |       ProvisioningRoleArn: !GetAtt DataZoneDomainExecutionRole.Arn
356 |       RegionalParameters:
357 |         - Region: !Ref AWS::Region
358 |           Parameters:
359 |             DataLakeAccountId: !Ref AWS::AccountId
360 |             DataLakeRegion: !Ref AWS::Region
361 |             S3Location: !Sub s3://${DataLakeBucketName}
362 |   
363 |   LabProject:
364 |     Type: AWS::DataZone::Project
365 |     DependsOn: LabEnvironmentBlueprintConfig
366 |     Properties:
367 |       DomainIdentifier: !GetAtt DataZoneDomain.Id
368 |       Name: EHDS-project
369 |       Description: Project for EHDS data
370 | 
371 |   LabEnvironmentProfile:
372 |     Type: AWS::DataZone::EnvironmentProfile
373 |     DependsOn: LabProject
374 |     Properties:
375 |       Name: ehds-environment-profile
376 |       Description: DataZone domain for EHDS data
377 |       EnvironmentBlueprintIdentifier: !GetAtt LabEnvironmentBlueprintConfig.EnvironmentBlueprintId
378 |       DomainIdentifier: !GetAtt DataZoneDomain.Id
379 |       AwsAccountId: !Ref AWS::AccountId
380 |       AwsAccountRegion: !Ref AWS::Region
381 |       ProjectIdentifier: !GetAtt LabProject.Id
382 |   
383 |   LabEnvironment:
384 |     Type: AWS::DataZone::Environment
385 |     DependsOn: LabEnvironmentProfile
386 |     Properties:
387 |       Name: EHDSEnvironment
388 |       Description: Environment for the EHDS project
389 |       DomainIdentifier: !GetAtt DataZoneDomain.Id
390 |       ProjectIdentifier: !GetAtt LabProject.Id
391 |       EnvironmentProfileIdentifier: !GetAtt LabEnvironmentProfile.Id
392 | 
393 |   ProjectMemberExecutionRole:
394 |     DependsOn: 
395 |       - LabProject
396 |     Type: AWS::DataZone::ProjectMembership
397 |     Properties:
398 |       Designation: PROJECT_CONTRIBUTOR
399 |       DomainIdentifier: !GetAtt DataZoneDomain.Id
400 |       Member: 
401 |         UserIdentifier: !GetAtt DataZoneDomainExecutionRole.Arn
402 |       ProjectIdentifier: !GetAtt LabProject.Id
403 | 
404 |   DataSource:
405 |     DependsOn: 
406 |       - LabProject
407 |       - DataZoneDomain
408 |       - LabEnvironmentBlueprintConfig
409 |       - LabEnvironmentProfile
410 |       - LabEnvironment
411 |       - LakeFormationDatabase
412 |       - GlueCrawler
413 |     Type: AWS::DataZone::DataSource
414 |     Properties:
415 |       Configuration:
416 |         GlueRunConfiguration:
417 |           RelationalFilterConfigurations:
418 |             - DatabaseName: !Ref LakeFormationDatabase
419 |       DomainIdentifier: !GetAtt DataZoneDomain.Id
420 |       EnableSetting: ENABLED
421 |       EnvironmentIdentifier: !GetAtt LabEnvironment.Id
422 |       Name: ehds-assets
423 |       ProjectIdentifier: !GetAtt LabProject.Id
424 |       PublishOnImport: true
425 |       Type: GLUE
426 |       
427 |   TriggerDataSourceRunFunction:
428 |     Type: AWS::Lambda::Function
429 |     Properties:
430 |       Handler: index.handler
431 |       Role: !GetAtt DataZoneDomainExecutionRole.Arn
432 |       Code:
433 |         ZipFile: |
434 |           # MIT No Attribution
435 |           # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
436 |           # Permission is hereby granted, free of charge, to any person obtaining a copy of this
437 |           # software and associated documentation files (the "Software"), to deal in the Software
438 |           # without restriction, including without limitation the rights to use, copy, modify,
439 |           # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
440 |           # permit persons to whom the Software is furnished to do so.
441 |           # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
442 |           # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
443 |           # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
444 |           # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
445 |           # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
446 |           # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
447 |           import boto3
448 |           import os
449 |           import logging
450 | 
451 |           logger = logging.getLogger()
452 |           logger.setLevel(logging.INFO)
453 | 
454 |           def handler(event, context):
455 |             client = boto3.client('datazone')
456 |             domain_id = os.environ['DOMAIN_ID']
457 |             data_source_id = os.environ['DATA_SOURCE_ID']
458 |             
459 |             try:
460 |                 response = client.start_data_source_run(
461 |                     domainIdentifier=domain_id,
462 |                     dataSourceIdentifier=data_source_id
463 |                 )
464 |                 logger.info(f"DataSource run started successfully")
465 |                 return {
466 |                     'statusCode': 200,
467 |                     'body': 'DataSource run started successfully'
468 |                 }
469 |             except Exception as e:
470 |                 logger.error(f"Error starting DataSource run: {str(e)}")
471 |                 return {
472 |                     'statusCode': 500,
473 |                     'body': f'Error starting DataSource run: {str(e)}'
474 |                 }
475 |       Runtime: python3.13
476 |       Timeout: 60
477 |       Environment:
478 |         Variables:
479 |           DOMAIN_ID: !GetAtt DataZoneDomain.Id
480 |           DATA_SOURCE_ID: !GetAtt DataSource.Id
481 | 
482 |   GlueCrawlerStateChangeRule:
483 |     Type: AWS::Events::Rule
484 |     Properties:
485 |       Description: Trigger EventBridge rule to trigger Lambda when Glue crawler completes.
486 |       EventPattern:
487 |         source:
488 |           - aws.glue
489 |         detail-type:
490 |           - Glue Crawler State Change
491 |       State: ENABLED
492 |       Targets:
493 |         - Arn: !GetAtt TriggerDataSourceRunFunction.Arn
494 |           Id: TriggerDataSourceRunTarget
495 | 
496 |   TriggerDataSourceRunRole:
497 |     Type: AWS::IAM::Role
498 |     Properties:
499 |       AssumeRolePolicyDocument:
500 |         Version: '2012-10-17'
501 |         Statement:
502 |           - Effect: Allow
503 |             Principal:
504 |               Service: 
505 |                 - lambda.amazonaws.com
506 |             Action: sts:AssumeRole
507 |       ManagedPolicyArns:
508 |         - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
509 |       Policies:
510 |         - PolicyName: DataZoneSpecificAccess
511 |           PolicyDocument:
512 |             Version: '2012-10-17'
513 |             Statement:
514 |               - Effect: Allow
515 |                 Action: 
516 |                   - datazone:StartDataSourceRun
517 |                   - datazone:GetDataSource
518 |                   - datazone:ListDataSources
519 |                 Resource: !Sub 'arn:aws:datazone:${AWS::Region}:${AWS::AccountId}:domain/${DataZoneDomain.Id}/*'
520 |         - PolicyName: PassRoleToDataZone
521 |           PolicyDocument:
522 |             Version: '2012-10-17'
523 |             Statement:
524 |               - Effect: Allow
525 |                 Action: 
526 |                   - iam:PassRole
527 |                 Resource: 
528 |                   - !GetAtt DataZoneDomainExecutionRole.Arn
529 |                 Condition:
530 |                   StringEquals:
531 |                     iam:PassedToService: datazone.amazonaws.com
532 | 
533 |   LambdaPermissionForEventBridge:
534 |     Type: AWS::Lambda::Permission
535 |     Properties:
536 |       FunctionName: !Ref TriggerDataSourceRunFunction
537 |       Action: "lambda:InvokeFunction"
538 |       Principal: "events.amazonaws.com"
539 |       SourceArn: !GetAtt GlueCrawlerStateChangeRule.Arn
540 | 
541 |   DataZoneAutoApprovalFunction:
542 |     Type: AWS::Lambda::Function
543 |     Properties:
544 |       Handler: index.lambda_handler
545 |       Role: !GetAtt DataZoneDomainExecutionRole.Arn
546 |       FunctionName: datazone-auto-approval
547 |       Code:
548 |         ZipFile: |
549 |           # MIT No Attribution
550 |           # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
551 |           # Permission is hereby granted, free of charge, to any person obtaining a copy of this
552 |           # software and associated documentation files (the "Software"), to deal in the Software
553 |           # without restriction, including without limitation the rights to use, copy, modify,
554 |           # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
555 |           # permit persons to whom the Software is furnished to do so.
556 |           # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
557 |           # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
558 |           # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
559 |           # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
560 |           # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
561 |           # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
562 |           import boto3
563 |           from botocore.exceptions import ClientError
564 |           import json
565 |           import logging
566 | 
567 |           logger = logging.getLogger()
568 |           logger.setLevel(logging.INFO)
569 | 
570 |           def lambda_handler(event, context):
571 |               logger.info(f"Received event: {json.dumps(event)}")
572 |               
573 |               try:
574 |                   domain_id = event['detail']['metadata']['domain']
575 |                   subscription_id = event['detail']['metadata']['id']
576 |                   subscribed_listing = event['detail']['data']['subscribedListings'][0]
577 |                   subscribed_principal = event['detail']['data']['subscribedPrincipals'][0]
578 |                   
579 |                   listing_id = subscribed_listing['id']
580 |                   principal_id = subscribed_principal['id']
581 |                   
582 |                   logger.info(f"Subscribed Listing ID: {listing_id}")
583 |                   logger.info(f"Subscribed Principal ID: {principal_id}")
584 |                   
585 |                   datazone = boto3.client('datazone')
586 |                   response = datazone.accept_subscription_request(
587 |                       decisionComment='Auto Approved by Lambda',
588 |                       domainIdentifier=domain_id,
589 |                       identifier=subscription_id
590 |                   )
591 |                   logger.info(f"Subscription accepted: {json.dumps(response)}")
592 |                   
593 |                   return {
594 |                       'statusCode': 200,
595 |                       'body': json.dumps('Subscription request approved successfully')
596 |                   }
597 |               except KeyError as e:
598 |                   logger.error(f"KeyError: {str(e)} - some expected keys were missing in the event data")
599 |                   return {
600 |                       'statusCode': 400,
601 |                       'body': json.dumps('Error: Invalid event data structure')
602 |                   }
603 |               except IndexError as e:
604 |                   logger.error(f"IndexError: {str(e)} - expected lists are empty")
605 |                   return {
606 |                       'statusCode': 400,
607 |                       'body': json.dumps('Error: Missing subscription data')
608 |                   }
609 |               except ClientError as e:
610 |                   logger.error(f"ClientError: {str(e)}")
611 |                   return {
612 |                       'statusCode': 500,
613 |                       'body': json.dumps('Error: Failed to approve subscription request')
614 |                   }
615 |               except Exception as e:
616 |                   logger.error(f"Unexpected error: {str(e)}")
617 |                   return {
618 |                       'statusCode': 500,
619 |                       'body': json.dumps('Error: Unexpected error occurred')
620 |                   }
621 | 
622 |       Runtime: python3.13
623 |       Timeout: 300
624 | 
625 |   DataZoneEventRule:
626 |     Type: AWS::Events::Rule
627 |     Properties:
628 |       Name: datazone-subscription-event
629 |       EventPattern: 
630 |         source:
631 |           - aws.datazone
632 |         detail-type:
633 |           - "Subscription Request Created"
634 |       State: ENABLED
635 |       Description: datazone-subscription-event
636 |       EventBusName: default
637 |       Targets:
638 |         - Id: datazoneAutoApprovalTarget
639 |           Arn: !GetAtt DataZoneAutoApprovalFunction.Arn
640 | 
641 |   LambdaInvokePermissionDataZoneAutoApprove:
642 |     Type: AWS::Lambda::Permission
643 |     Properties:
644 |       FunctionName: !Ref DataZoneAutoApprovalFunction
645 |       Action: lambda:InvokeFunction
646 |       Principal: events.amazonaws.com
647 |       SourceArn: !GetAtt DataZoneEventRule.Arn
648 | 
649 | 
650 | 
651 | Outputs:
652 |   DomainId:
653 |     Description: The ID of the created DataZone Domain
654 |     Value: !Ref DataZoneDomain
655 |   DomainExecutionRoleArn:
656 |     Description: The ARN of the created DataZone Domain Execution Role
657 |     Value: !GetAtt DataZoneDomainExecutionRole.Arn
658 | 


--------------------------------------------------------------------------------