├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DE-diagram.png ├── LICENSE ├── README.md ├── logo.png ├── providers ├── python │ ├── add-amazon-redshift-dataset │ │ ├── README.md │ │ ├── add-amazon-redshift-dataset.py │ │ ├── datashare_arn.png │ │ └── requirements.txt │ ├── add-amazon-s3-data-access │ │ ├── README.md │ │ ├── add-amazon-s3-access.py │ │ └── requirements.txt │ ├── add-amazon-s3-data-files │ │ ├── README.md │ │ ├── add-amazon-s3-files.py │ │ └── requirements.txt │ ├── create-amazon-s3-files-data-grant │ │ ├── README.md │ │ ├── create-data-grant.py │ │ └── requirements.txt │ └── revoke-revisions-from-published-dataset │ │ ├── README.md │ │ ├── requirements.txt │ │ └── revision_pruning.py ├── ruby │ ├── add-data-set │ │ ├── Gemfile │ │ ├── Gemfile.lock │ │ ├── README.md │ │ └── add-data-set.rb │ ├── add-revision-to-a-data-set │ │ ├── Gemfile │ │ ├── Gemfile.lock │ │ ├── README.md │ │ └── add-revision-to-a-data-set.rb │ ├── create-data-set-with-finalized-revision │ │ ├── Gemfile │ │ ├── Gemfile.lock │ │ ├── README.md │ │ └── create-data-set-with-finalized-revision.rb │ └── enumerate-data-products │ │ ├── Gemfile │ │ ├── Gemfile.lock │ │ ├── README.md │ │ └── enumerate-data-products.rb └── usage-metrics │ ├── README.md │ ├── img │ └── architecture.png │ └── source │ ├── adx_metrics_infrastructure.yaml │ └── adx_metrics_processing.py └── subscribers ├── cpp └── all-entitled-datasets │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── dotnet ├── .gitignore └── all-entitled-datasets │ ├── AwsDataExchangeSample.csproj │ ├── Program.cs │ └── README.md ├── go ├── adx-for-apis-simple │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go └── all-entitled-datasets │ ├── README.md │ └── all-entitled-datasets.go ├── java ├── .gitignore └── all-entitled-datasets │ ├── README.md │ ├── pom.xml │ └── src │ └── main │ └── java │ └── com │ └── amazonaws │ └── dataexchange │ └── App.java ├── javascript ├── adx-for-apis-simple │ ├── .gitignore │ ├── README.md │ ├── adx4api.ts │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json ├── all-entitled-datasets │ ├── .gitignore │ ├── .npmrc │ ├── README.md │ ├── index.ts │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json └── auto-export-to-s3 │ ├── .gitignore │ ├── README.md │ ├── cdk.json │ ├── cfn │ └── lambda.ts │ ├── lambda │ └── exportToS3.ts │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json ├── php └── all-entitled-datasets │ ├── .gitignore │ ├── README.md │ ├── all-entitled-datasets.php │ ├── composer.json │ └── composer.lock ├── python ├── adx-for-apis-simple │ ├── README.md │ ├── adx4api.py │ ├── pylintrc │ └── requirements.txt ├── download-entitled-assets │ ├── README.md │ ├── download-entitled-assets.py │ └── requirements.txt ├── export-data-sets │ ├── README.md │ ├── export-data-sets.py │ └── requirements.txt ├── pandas-describe-csv │ ├── README.md │ ├── pandas-describe-csv.py │ └── requirements.txt ├── setup-redshift-data-shares │ ├── README.md │ ├── requirements.txt │ └── setup-redshift-data-shares.py └── tf-auto-export-to-s3 │ ├── README.md │ ├── adx-example.tf │ ├── build.sh │ ├── dataexchange │ └── 2017-07-25 │ │ ├── paginators-1.json │ │ └── service-2.json │ └── index.py └── ruby ├── adx-for-apis-simple ├── Gemfile ├── Gemfile.lock ├── README.md └── send_api_asset.rb ├── all-entitled-datasets ├── Gemfile ├── Gemfile.lock ├── README.md └── all-entitled-datasets.rb └── most-expensive-neighborhoods-in-nyc ├── Gemfile ├── Gemfile.lock ├── README.md └── most-expensive-neighborhoods-in-nyc.rb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /DE-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-dataexchange-api-samples/eb511854eb9743874f3679967dd04c9a2ae5c7bc/DE-diagram.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Data Exchange logo](logo.png) 2 | 3 | # AWS Data Exchange Samples 4 | 5 | [AWS Data Exchange](https://aws.amazon.com/data-exchange/) is a service that makes it easy for millions of AWS customers to securely find, subscribe to, and use third-party data in the cloud. This repository contains a collection of samples that interact with the AWS Data Exchange API. 6 | 7 | 8 | ## Subscriber Samples 9 | 10 | Examples of interacting with the AWS Data Exchange API from the data subscriber side can be found in [/subscribers](subscribers). 11 | 12 | ### Java 13 | 14 | * [all-entitled-datasets](subscribers/java/all-entitled-datasets): Lists all data sets one is subscribed to. 15 | 16 | ### Ruby 17 | 18 | * [all-entitled-datasets](subscribers/ruby/all-entitled-datasets): Lists all data sets one is subscribed to. 19 | * [most-expensive-neighborhoods-in-nyc](subscribers/ruby/most-expensive-neighborhoods-in-nyc): Most expensive neighborhoods in NYC by median price. 20 | 21 | ### JavaScript 22 | 23 | * [all-entitled-datasets](subscribers/javascript/all-entitled-datasets): Lists all data sets one is subscribed to. 24 | * [auto-export-to-s3](subscribers/javascript/auto-export-to-s3): Automatically exports newly published revisions to S3 using a CloudWatch Rule and Lambda Function. 25 | 26 | ### Go 27 | 28 | * [all-entitled-datasets](subscribers/go/all-entitled-datasets): Lists all data sets one is subscribed to. 29 | 30 | ### Python 31 | 32 | * [export-data-sets](subscribers/python/export-data-sets): Exports data sets specified into an Amazon S3 bucket. 33 | * [download-entitled-assets](subscribers/python/download-entitled-assets): Download all assets to which you've subscribed. 34 | * [pandas-describe-csv](subscribers/python/pandas-describe-csv): Download a CSV asset by Arn, import it into a Pandas data frame, and `describe()` the result. 35 | * [auto-export-to-s3](subscribers/python/tf-auto-export-to-s3): Terraform based deployment to automatically export newly published revisions to S3 using a CloudWatch Rule and Lambda. 36 | * [setup-redshift-data-shares](subscribers/python/setup-redshift-data-shares): Sets up AWS Data Exchange datashares for querying in Amazon Redshift. 37 | 38 | ### DotNet 39 | 40 | * [all-entitled-datasets](subscribers/dotnet/all-entitled-datasets): Lists all data sets one is subscribed to. 41 | 42 | ### C++ 43 | 44 | * [all-entitled-datasets](subscribers/cpp/all-entitled-datasets): Lists all data sets one is subscribed to. 45 | 46 | ### PHP 47 | 48 | * [all-entitled-datasets](subscribers/php/all-entitled-datasets): Lists all data sets one is subscribed to. 49 | 50 | ## Provider Samples 51 | 52 | Examples of interacting with the AWS Data Exchange API from the data provider side can be found in [/providers](providers). 53 | 54 | ### Usage Metrics 55 | * [Monitor Subscriber usage of AWS Data Exchange For Amazon S3 data sets](providers/usage-metrics): Deploy infrastructure to report and visualize subscriber usage on AWS Data Exchange for Amazon S3 data sets. 56 | 57 | ### Python 58 | * [create-data-set-with-amazon-s3-access-points](providers/python/add-amazon-s3-data-access): Create a data set that will contain an Amazon S3 Access Point Asset. 59 | 60 | ### Ruby 61 | 62 | * [create-data-set-with-finalized-revision](providers/ruby/create-data-set-with-finalized-revision): Create a data set with a finalized revision. 63 | * [enumerate-data-products](providers/ruby/enumerate-data-products): Enumerate data products, examine each product's data sets, and fetch a data set. 64 | * [add-data-set](providers/ruby/add-data-set): Create and publish a data set into an existing product. 65 | * [add-revision-to-a-data-set](providers/ruby/add-revision-to-a-data-set): Add a new revision to a data set using data in S3. 66 | 67 | ## Other Samples 68 | 69 | * [awslabs/aws-data-exchange-publisher-coordinator](https://github.com/awslabs/aws-data-exchange-publisher-coordinator): Coordinate the publishing steps for a dataset revision based on an S3 manifest file being uploaded to the specified S3 bucket. 70 | 71 | ## API References 72 | 73 | * [AWS Data Exchange API](https://docs.aws.amazon.com/data-exchange/latest/apireference/welcome.html) 74 | * [AWS Marketplace Catalog API](https://docs.aws.amazon.com/marketplace-catalog/latest/api-reference/welcome.html) 75 | 76 | ## License 77 | 78 | This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file. 79 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-dataexchange-api-samples/eb511854eb9743874f3679967dd04c9a2ae5c7bc/logo.png -------------------------------------------------------------------------------- /providers/python/add-amazon-redshift-dataset/README.md: -------------------------------------------------------------------------------- 1 | # Add Amazon Redshift Datashare Dataset (Python) 2 | 3 | This example will create a data set for Amazon Redshift Datashare. The data set will contain an 4 | Amazon Redshift Datashare, which enables subscribers to have read-only access to shared tables and views added to the Datashare. 5 | 6 | ### Setup 7 | 8 | Install the requirements: 9 | 10 | ```bash 11 | $ pip3 install -r requirements.txt 12 | ``` 13 | 14 | Set the AWS access key and secret environment variables: 15 | 16 | ``` 17 | $ export AWS_ACCESS_KEY_ID= 18 | $ export AWS_ACCESS_KEY_ID= 19 | $ export AWS_SECRET_ACCESS_KEY= 20 | $ export AWS_SESSION_TOKEN= 21 | ``` 22 | 23 | The user needs the **AWSDataExchangeProviderFullAccess** IAM policy associated with your role/account. Find out more 24 | about IAM policies on AWS Data Exchange [here](https://docs.aws.amazon.com/data-exchange/latest/userguide/auth-access.html). 25 | 26 | The user should create an Amazon Redshift Datashare managed by ADX using Amazon Redshift serverless or RA3 provisioned clusters. The following are template 27 | queries that can be executed from your Redshift Query editor. You can learn more on how to create datashares managed 28 | by aws data exchange [here](https://docs.aws.amazon.com/redshift/latest/dg/adx-getting-started-producer.html). 29 | ``` 30 | CREATE DATASHARE [datashare_name] MANAGEDBY ADX; 31 | ALTER DATASHARE [datashare_name] ADD SCHEMA [schema] 32 | ALTER DATASHARE [datashare_name] ADD TABLE [table_name] 33 | ALTER DATASHARE [datashare_name] ADD FUNCTION [function_name] 34 | ``` 35 | 36 | You can then note down the Datashare ARN from the console by choosing the Datashare created. 37 | 38 | ![copy_datashare_arn](datashare_arn.png) 39 | 40 | ### Example Usage 41 | 42 | Get usage help: `python3 add-amazon-redshift-dataset.py --help` 43 | 44 | Share an Amazon Redshift Datashare: `python3 add-amazon-redshift-dataset.py --data-set-name 'programmatic-redshift-example' --datashare-arn 'arn:aws:redshift:us-east-1:123456789012:datashare:9aa6adfe-0q13-1234-a2cd-ef1234dd908c/demo' --region us-east-1` 45 | 46 | **Note**: You may specify a `data-set-id` parameter to add an Amazon Redshift Datashareto an existing data set. Any existing 47 | datashares will be replaced. 48 | -------------------------------------------------------------------------------- /providers/python/add-amazon-redshift-dataset/add-amazon-redshift-dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import boto3 3 | import click 4 | import time 5 | 6 | 7 | def create_data_set(dataexchange, data_set_name): 8 | data_set_creation_response = dataexchange.create_data_set( 9 | AssetType='REDSHIFT_DATA_SHARE', 10 | Name=data_set_name, 11 | Description='Redshift DataShare data set' 12 | ) 13 | return data_set_creation_response['Id'] 14 | 15 | 16 | def add_shares(dataexchange, data_set_id, revision_id, datashare_arn): 17 | create_job_details = { 18 | 'ImportAssetsFromRedshiftDataShares': { 19 | 'AssetSources': [ 20 | { 21 | 'DataShareArn': datashare_arn 22 | }, 23 | ], 24 | 'DataSetId': data_set_id, 25 | 'RevisionId': revision_id 26 | } 27 | } 28 | 29 | create_job_response = dataexchange.create_job( 30 | Details=create_job_details, 31 | Type='IMPORT_ASSETS_FROM_REDSHIFT_DATA_SHARES' 32 | ) 33 | 34 | job_id = create_job_response['Id'] 35 | job_state = create_job_response['State'] 36 | 37 | if job_state == 'ERROR': 38 | raise click.ClickException(f'Data set creation failed with status {job_state}!') 39 | 40 | return job_id 41 | 42 | 43 | def wait_for_job_to_complete(dataexchange, job_id): 44 | dataexchange.start_job(JobId=job_id) 45 | 46 | while True: 47 | job_status_response = dataexchange.get_job(JobId=job_id) 48 | job_state = job_status_response['State'] 49 | 50 | if job_state in ['ABORTED', 'FAILED', 'ERROR']: 51 | print(job_status_response) 52 | raise click.ClickException(f'Data set creation failed with status {job_state}!') 53 | if job_state == 'COMPLETED': 54 | return 55 | 56 | click.echo(f'Still waiting for job {job_id} to finish.') 57 | time.sleep(5) 58 | 59 | 60 | @click.command() 61 | @click.option('--data-set-name', required=True, help='Name of the AWS Data Exchange data set to create.') 62 | @click.option('--datashare-arn', required=True, 63 | help='Amazon Redshift Datashare ARN that contains data to share.') 64 | @click.option('--data-set-id', 65 | help='If supplied, the data set ID to which the datashare will be added. Existing datashares will be replaced.') 66 | @click.option('--region', default='us-east-1', 67 | help='AWS Region of the Amazon Redshift Datashare , and where the data set will be. Default value is us-east-1') 68 | def main(data_set_name, data_set_id, region, datashare_arn): 69 | 70 | dataexchange = boto3.client('dataexchange', region_name=region) 71 | 72 | data_set_id_to_use = create_data_set(dataexchange, data_set_name) if data_set_id is None else data_set_id 73 | 74 | create_revision_response = dataexchange.create_revision(DataSetId=data_set_id_to_use) 75 | revision_id = create_revision_response['Id'] 76 | 77 | job_id = add_shares(dataexchange, data_set_id_to_use, revision_id, datashare_arn) 78 | wait_for_job_to_complete(dataexchange, job_id) 79 | finalize_revision_response = dataexchange.update_revision(DataSetId=data_set_id_to_use, RevisionId=revision_id, 80 | Finalized=True) 81 | 82 | click.echo(f'Data set {data_set_id_to_use} configured with Amazon Redshift Datashare.') 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /providers/python/add-amazon-redshift-dataset/datashare_arn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-dataexchange-api-samples/eb511854eb9743874f3679967dd04c9a2ae5c7bc/providers/python/add-amazon-redshift-dataset/datashare_arn.png -------------------------------------------------------------------------------- /providers/python/add-amazon-redshift-dataset/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.26.92 2 | botocore>=1.26.92 3 | Click>=7.0 4 | -------------------------------------------------------------------------------- /providers/python/add-amazon-s3-data-access/README.md: -------------------------------------------------------------------------------- 1 | # Add Amazon S3 Data Access (Python) 2 | 3 | This example will create a data set with for AWS Data Exchange for Amazon S3. The data set will contain an 4 | Amazon S3 Access Point, which enables subscribers to have read-only access to the shared locations specified. Shared 5 | locations can be a combination of Amazon S3 prefixes and specific keys, or an entire Amazon S3 bucket. 6 | 7 | ### Setup 8 | 9 | Install the requirements: 10 | 11 | ```bash 12 | $ pip3 install -r requirements.txt 13 | ``` 14 | 15 | Set the AWS access key and secret environment variables: 16 | 17 | ``` 18 | $ export AWS_ACCESS_KEY_ID= 19 | $ export AWS_SECRET_ACCESS_KEY= 20 | $ export AWS_SESSION_TOKEN= 21 | ``` 22 | 23 | The user needs the **AWSDataExchangeProviderFullAccess** IAM policy associated with your role/account. Find out more 24 | about IAM policies on AWS Data Exchange [here](https://docs.aws.amazon.com/data-exchange/latest/userguide/auth-access.html). 25 | 26 | Ensure that the S3 buckets hosting the data has encryption disabled or encrypted with Amazon S3-managed keys (SSE-S3) or 27 | customer-managed keys stored in AWS Key Management Service (AWS KMS). If you are using customer-managed keys, you must have IAM permissions 28 | to `kms:CreateGrant` on the KMS keys. You can access these through the key policy, IAM credentials, or through an AWS KMS grant on the KMS key. 29 | For more information on this, see [prerequisites](https://docs.aws.amazon.com/data-exchange/latest/userguide/publishing-products.html#publish-s3-data-access-product). 30 | 31 | The target Amazon S3 bucket also must have the bucket owner enforced setting applied. Attach the following bucket policy to grant AWS Data Exchange permissions to 32 | correctly manage S3 Access Points on your behalf, replacing `` with the ARN of the target Amazon S3 33 | bucket: 34 | 35 | ``` 36 | { 37 | "Version": "2012-10-17", 38 | "Statement": [ 39 | { 40 | "Effect": "Allow", 41 | "Principal": { 42 | "AWS": "*" 43 | }, 44 | "Action": [ 45 | "s3:GetObject", 46 | "s3:ListBucket" 47 | ], 48 | "Resource": [ 49 | "", 50 | "/*" 51 | ], 52 | "Condition": { 53 | "StringEquals": { 54 | "s3:DataAccessPointAccount": [ 55 | "337040091392", 56 | "504002150500", 57 | "366362662752", 58 | "330489627928", 59 | "291973504423", 60 | "461002523379", 61 | "036905324694", 62 | "540564263739", 63 | "675969394711", 64 | "108584782536", 65 | "844053218156" 66 | ] 67 | } 68 | } 69 | } 70 | ] 71 | } 72 | ``` 73 | 74 | ### Example Usage 75 | 76 | Get usage help: `python3 add-amazon-s3-access.py --help` 77 | 78 | Share an Amazon S3 bucket: `python3 add-amazon-s3-access.py --data-set-name 'publisher-script-example' --bucket 'example-source-bucket' --region us-east-1` 79 | 80 | Share prefixes and keys within an Amazon S3 bucket: `python3 add-amazon-s3-access.py --data-set-name 'publisher-script-example' --bucket 'example-source-bucket' --region 'us-east-1' --key 'createJob.png' --prefix 'Folder1' --prefix 'Folder2'` 81 | 82 | Share prefixes and keys within an Amazon S3 bucket encrypted with customer-managed KMS: `python3 add-amazon-s3-access.py --data-set-name 'publisher-script-example-kms' --bucket 'example-source-bucket-kms' --region 'us-east-1' --key 'createJob.png' --prefix 'Folder1' --prefix 'Folder2' --kms-key-arn 'arn:aws:kms:us-east-1:123456789:key/abc-def-ghi-jkl-mno' --kms-key-arn 'arn:aws:kms:us-east-1:234567891:key/def-ghi-jkl-mno-abc'` 83 | 84 | **Note**: You may specify a `data-set-id` parameter to add an S3 data access to an existing data set. Any existing 85 | data access will be replaced. 86 | -------------------------------------------------------------------------------- /providers/python/add-amazon-s3-data-access/add-amazon-s3-access.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import boto3 3 | import click 4 | import time 5 | 6 | def create_data_set(dataexchange, data_set_name): 7 | data_set_creation_response = dataexchange.create_data_set( 8 | AssetType='S3_DATA_ACCESS', 9 | Name=data_set_name, 10 | Description='S3 Data Access data set' 11 | ) 12 | return data_set_creation_response['Id'] 13 | 14 | def add_shares(dataexchange, data_set_id, revision_id, bucket, prefix, key, kms_keys): 15 | # Ensure that all prefixes end with a trailing slash; otherwise, these would not be resolved by Amazon S3 16 | def format_prefix(p): 17 | if p.endswith('/'): 18 | return p 19 | return p + '/' 20 | 21 | prefixes = list(map(format_prefix, prefix)) 22 | 23 | def wrap_kms_key(k): 24 | return { 25 | "KmsKeyArn": k 26 | } 27 | 28 | create_job_details = { 29 | 'CreateS3DataAccessFromS3Bucket': { 30 | 'AssetSource': { 31 | 'Bucket': bucket, 32 | 'KeyPrefixes': prefixes, 33 | 'Keys': key 34 | }, 35 | 'DataSetId': data_set_id, 36 | 'RevisionId': revision_id 37 | } 38 | } 39 | 40 | if (len(kms_keys) > 0): 41 | create_job_details['CreateS3DataAccessFromS3Bucket']['AssetSource']['KmsKeysToGrant'] = list(map(wrap_kms_key, kms_keys)) 42 | 43 | create_job_response = dataexchange.create_job( 44 | Details=create_job_details, 45 | Type='CREATE_S3_DATA_ACCESS_FROM_S3_BUCKET' 46 | ) 47 | 48 | job_id = create_job_response['Id'] 49 | job_state = create_job_response['State'] 50 | 51 | if (job_state == 'ERROR'): 52 | raise click.ClickException(f'Data set creation failed with status {job_state}!') 53 | 54 | return job_id 55 | 56 | def wait_for_job_to_complete(dataexchange, job_id): 57 | dataexchange.start_job(JobId=job_id) 58 | 59 | while True: 60 | job_status_response = dataexchange.get_job(JobId=job_id) 61 | job_state = job_status_response['State'] 62 | 63 | if job_state in ['ABORTED', 'FAILED', 'ERROR']: 64 | raise click.ClickException(f'Data set creation failed with status {job_state}!') 65 | if job_state == 'COMPLETED': 66 | return 67 | 68 | click.echo(f'Still waiting for job {job_id} to finish.') 69 | time.sleep(5) 70 | 71 | 72 | @click.command() 73 | @click.option('--data-set-name', required=True, help='Name of the AWS Data Exchange data set to create.') 74 | @click.option('--bucket', required=True, help='Name of the Amazon S3 bucket that contains the prefixes and keys to share.') 75 | @click.option('--data-set-id', help='If supplied, the data set ID to which the data access will be added. Existing access will be replaced.') 76 | @click.option('--region', default='us-east-1', help='AWS Region of the Amazon S3 bucket, and where the data set will be.') 77 | @click.option('--prefix', default=[], help='Prefix of an Amazon S3 location to share. Multiple values permitted.', multiple=True) 78 | @click.option('--key', default=[], help='Key of an Amazon S3 object to share. Multiple values permitted.', multiple=True) 79 | @click.option('--kms-key-arn', default=[], help='Amazon Resource Name of the KMS key used to encrypt the shared objects. Multiple values permitted.', multiple=True) 80 | def main(data_set_name, bucket, data_set_id, region, prefix, key, kms_key_arn): 81 | if (len(prefix) + len(key)) > 5: 82 | raise click.UsageError('No more than a total of 5 prefixes and keys can be provided.') 83 | 84 | if (len(kms_key_arn)) > 10: 85 | raise click.UsageError('No more than a total of 10 KMS keys can be provided.') 86 | 87 | dataexchange = boto3.client('dataexchange', region_name=region) 88 | 89 | data_set_id_to_use = create_data_set(dataexchange, data_set_name) if data_set_id is None else data_set_id 90 | 91 | create_revision_response = dataexchange.create_revision(DataSetId=data_set_id_to_use) 92 | revision_id = create_revision_response['Id'] 93 | 94 | job_id = add_shares(dataexchange, data_set_id_to_use, revision_id, bucket, prefix, key, kms_key_arn) 95 | wait_for_job_to_complete(dataexchange, job_id) 96 | finalize_revision_response = dataexchange.update_revision(DataSetId=data_set_id_to_use, RevisionId=revision_id, Finalized=True) 97 | 98 | click.echo(f'Data set {data_set_id_to_use} configured with Amazon S3 data access.') 99 | 100 | if __name__ == '__main__': 101 | main() 102 | -------------------------------------------------------------------------------- /providers/python/add-amazon-s3-data-access/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.26.92 2 | botocore>=1.26.92 3 | Click>=7.0 4 | -------------------------------------------------------------------------------- /providers/python/add-amazon-s3-data-files/README.md: -------------------------------------------------------------------------------- 1 | # Add Amazon S3 Files Dataset (Python) 2 | 3 | This script automates the process of creating and populating AWS Data Exchange datasets using content from Amazon S3 buckets. It creates a new dataset, generates a revision, and imports specified assets (either entire buckets, prefixes, or individual objects) into the revision. The script provides a simple command-line interface, allowing users to easily specify the source S3 bucket, dataset name, and desired assets, streamlining the process of preparing data for distribution through AWS Data Exchange. 4 | 5 | ### Setup 6 | 7 | Install the requirements: 8 | 9 | ```bash 10 | $ pip3 install -r requirements.txt 11 | ``` 12 | 13 | Set the AWS access key and secret environment variables: 14 | 15 | ``` 16 | $ export AWS_ACCESS_KEY_ID= 17 | $ export AWS_SECRET_ACCESS_KEY= 18 | $ export AWS_SESSION_TOKEN= 19 | ``` 20 | 21 | The user needs the **AWSDataExchangeProviderFullAccess** IAM policy associated with your role/account. Find out more 22 | about IAM policies on AWS Data Exchange [here](https://docs.aws.amazon.com/data-exchange/latest/userguide/auth-access.html). 23 | 24 | The user needs to list and read objects from the specified S3 bucket. You'll need the following S3 permissions: 25 | - s3:GetObject 26 | - s3:ListBucket 27 | 28 | Here's a sample IAM policy that includes the necessary permissions: 29 | ``` 30 | { 31 | "Version": "2012-10-17", 32 | "Statement": [ 33 | { 34 | "Effect": "Allow", 35 | "Action": [ 36 | "s3:ListBucket", 37 | "s3:GetObject" 38 | ], 39 | "Resource": [ 40 | "arn:aws:s3:::your-bucket-name", 41 | "arn:aws:s3:::your-bucket-name/*" 42 | ] 43 | } 44 | ] 45 | } 46 | ``` 47 | ### Example Usage 48 | 49 | Get usage help: `python3 add-amazon-s3-files.py --help` 50 | 51 | Create dataset and import all files from an Amazon S3 Bucket: `python3 add-amazon-s3-files.py --dataset-name example_dataset --bucket example_bucket` 52 | 53 | Create dataset and import files from specific prefix(es) within an Amazon S3 Bucket:`python3 add-amazon-s3-files.py --dataset-name example_prefix_dataset --bucket example_bucket --prefix data/` 54 | 55 | Create dataset and import specific file(s) from an Amazon S3 bucket: `python3 add-amazon-s3-files.py --dataset-name example_file_dataset --bucket example_bucket --key data/titanic.parquet` -------------------------------------------------------------------------------- /providers/python/add-amazon-s3-data-files/add-amazon-s3-files.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import time 3 | import click 4 | from botocore.exceptions import ClientError 5 | 6 | def create_dataset(dataexchange, dataset_name): 7 | """ 8 | Create a data files delivery method dataset 9 | """ 10 | try: 11 | dataset_params = { 12 | 'AssetType': 'S3_SNAPSHOT', 13 | 'Description': f'Dataset for {dataset_name}', 14 | 'Name': dataset_name, 15 | 'Tags': { 16 | 'Source': 'S3' 17 | } 18 | } 19 | 20 | response = dataexchange.create_data_set(**dataset_params) 21 | return response['Id'] 22 | except ClientError as e: 23 | click.echo(f"An error occurred while creating the dataset: {e}", err=True) 24 | raise 25 | 26 | def create_revision(dataexchange, dataset_id): 27 | """ 28 | Create a new revision for the dataset 29 | """ 30 | try: 31 | response = dataexchange.create_revision(DataSetId=dataset_id) 32 | return response['Id'] 33 | except ClientError as e: 34 | click.echo(f"An error occurred while creating the revision: {e}", err=True) 35 | raise 36 | 37 | def add_asset_to_revision(dataexchange, dataset_id, revision_id, bucket, prefixes, keys): 38 | """ 39 | Add assets to the revision based on prefixes and keys, or entire bucket if none specified 40 | """ 41 | try: 42 | s3 = boto3.client('s3') 43 | asset_sources = [] 44 | 45 | if not prefixes and not keys: 46 | # If no prefixes or keys specified, include entire bucket 47 | response = s3.list_objects_v2(Bucket=bucket) 48 | for obj in response.get('Contents', []): 49 | asset_sources.append({ 50 | 'Bucket': bucket, 51 | 'Key': obj['Key'] 52 | }) 53 | else: 54 | # Process prefixes 55 | for prefix in prefixes: 56 | if prefix: 57 | response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) 58 | for obj in response.get('Contents', []): 59 | asset_sources.append({ 60 | 'Bucket': bucket, 61 | 'Key': obj['Key'] 62 | }) 63 | 64 | # Add individual keys 65 | for key in keys: 66 | asset_sources.append({ 67 | 'Bucket': bucket, 68 | 'Key': key 69 | }) 70 | 71 | # Print assets being added 72 | for asset in asset_sources: 73 | print(f"Adding asset: s3://{asset['Bucket']}/{asset['Key']}") 74 | 75 | # Create a single job for all assets 76 | job_details = { 77 | 'ImportAssetsFromS3': { 78 | 'AssetSources': asset_sources, 79 | 'DataSetId': dataset_id, 80 | 'RevisionId': revision_id 81 | } 82 | } 83 | 84 | response = dataexchange.create_job(Type='IMPORT_ASSETS_FROM_S3', Details=job_details) 85 | job_id = response['Id'] 86 | 87 | # Wait for job completion 88 | job_state = wait_for_job_completion(dataexchange, job_id) 89 | if job_state != 'COMPLETED': 90 | raise click.ClickException(f"Job failed with state: {job_state}") 91 | 92 | return "All assets added successfully" 93 | except ClientError as e: 94 | click.echo(f"An error occurred while adding the assets: {e}", err=True) 95 | raise 96 | 97 | def wait_for_job_completion(dataexchange, job_id): 98 | """ 99 | Wait for a job to complete 100 | """ 101 | dataexchange.start_job(JobId=job_id) 102 | 103 | while True: 104 | job_response = dataexchange.get_job(JobId=job_id) 105 | job_state = job_response['State'] 106 | 107 | if job_state in ['COMPLETED', 'ERROR', 'CANCELLED']: 108 | if job_state == 'ERROR': 109 | error_details = job_response.get('Errors', []) 110 | click.echo(f"Job failed with errors: {error_details}") 111 | return job_state 112 | 113 | click.echo(f"Job {job_id} is in {job_state} state. Waiting...") 114 | time.sleep(5) 115 | 116 | def finalize_revision(dataexchange, dataset_id, revision_id): 117 | """ 118 | Finalize the revision 119 | """ 120 | try: 121 | dataexchange.update_revision(DataSetId=dataset_id, RevisionId=revision_id, Finalized=True) 122 | except ClientError as e: 123 | click.echo(f"An error occurred while finalizing the revision: {e}", err=True) 124 | raise 125 | 126 | @click.command() 127 | @click.option('--bucket', required=True, help='S3 bucket name or full S3 URI') 128 | @click.option('--prefix', multiple=True, help='S3 prefix to include (can be used multiple times)') 129 | @click.option('--key', multiple=True, help='S3 object key to include (can be used multiple times)') 130 | @click.option('--dataset-name', required=True, help='Name of the dataset') 131 | @click.option('--region', default='us-east-1', help='AWS region') 132 | def main(bucket, prefix, key, dataset_name, region): 133 | """ 134 | Create a dataset and import assets from S3. 135 | """ 136 | # Parse bucket and initial prefix from the provided bucket parameter 137 | if bucket.startswith('s3://'): 138 | parts = bucket.replace('s3://', '').split('/', 1) 139 | bucket_name = parts[0] 140 | initial_prefix = parts[1] if len(parts) > 1 else '' 141 | else: 142 | bucket_name = bucket 143 | initial_prefix = '' 144 | 145 | # Combine initial_prefix with provided prefixes 146 | all_prefixes = [initial_prefix] if initial_prefix else [] 147 | all_prefixes.extend(prefix) 148 | 149 | dataexchange = boto3.client('dataexchange', region_name=region) 150 | 151 | try: 152 | # Create dataset 153 | dataset_id = create_dataset(dataexchange, dataset_name) 154 | click.echo(f"Dataset created successfully. Dataset ID: {dataset_id}") 155 | 156 | # Create revision 157 | revision_id = create_revision(dataexchange, dataset_id) 158 | click.echo(f"Revision created successfully. Revision ID: {revision_id}") 159 | 160 | # Add assets to revision 161 | result = add_asset_to_revision(dataexchange, dataset_id, revision_id, bucket_name, all_prefixes, key) 162 | click.echo(result) 163 | 164 | # Finalize revision 165 | finalize_revision(dataexchange, dataset_id, revision_id) 166 | click.echo("Revision finalized successfully.") 167 | 168 | except Exception as e: 169 | click.echo(f"An error occurred: {str(e)}", err=True) 170 | raise click.Abort() 171 | 172 | if __name__ == "__main__": 173 | main() -------------------------------------------------------------------------------- /providers/python/add-amazon-s3-data-files/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>1.35 2 | botocore>=1.35 3 | Click>=7.0 4 | -------------------------------------------------------------------------------- /providers/python/create-amazon-s3-files-data-grant/README.md: -------------------------------------------------------------------------------- 1 | # Create file-based data grant on AWS Data Exchange (Python) 2 | 3 | This script automates the process of creating and populating an AWS Data Exchange dataset with files from an S3 bucket. It creates a new dataset, adds specified S3 objects as assets to a new revision within that dataset, and finalizes the revision. Finally, it creates a data grant for the dataset, allowing a specified AWS account to access the data, optionally with an expiration date. 4 | 5 | ### Setup 6 | 7 | Install the requirements: 8 | 9 | ```bash 10 | $ pip3 install -r requirements.txt 11 | ``` 12 | 13 | Set the AWS access key and secret environment variables: 14 | 15 | ``` 16 | $ export AWS_ACCESS_KEY_ID= 17 | $ export AWS_SECRET_ACCESS_KEY= 18 | $ export AWS_SESSION_TOKEN= 19 | ``` 20 | 21 | The user needs the **AWSDataExchangeProviderFullAccess** IAM policy associated with your role/account. Find out more 22 | about IAM policies on AWS Data Exchange [here](https://docs.aws.amazon.com/data-exchange/latest/userguide/auth-access.html). 23 | 24 | The user needs to list and read objects from the specified S3 bucket. You'll need the following S3 permissions: 25 | - s3:GetObject 26 | - s3:ListBucket 27 | 28 | Here's a sample IAM policy that includes the necessary permissions: 29 | ``` 30 | { 31 | "Version": "2012-10-17", 32 | "Statement": [ 33 | { 34 | "Effect": "Allow", 35 | "Action": [ 36 | "s3:ListBucket", 37 | "s3:GetObject" 38 | ], 39 | "Resource": [ 40 | "arn:aws:s3:::your-bucket-name", 41 | "arn:aws:s3:::your-bucket-name/*" 42 | ] 43 | } 44 | ] 45 | } 46 | ``` 47 | ### Example Usage 48 | 49 | Get usage help: `python3 create-data-grant.py --help` 50 | 51 | Create data grant for Amazon S3 Bucket: `python3 create-data-grant.py --dataset-name example_dataset --bucket s3://example-bucket --grant-name example_bucket_grant --grant-end-date 2024-12-01 --target-account-id 123456789012` 52 | 53 | Create data grant for prefix(s) within Amazon S3 Bucket: `python3 create-data-grant.py --dataset-name example_prefix_dataset --bucket example-bucket --prefix data/ --grant-name example_prefix_grant --grant-end-date 2024-12-01 --target-account-id 123456789012` 54 | 55 | Create data grant for specific key(s) within an Amazon S3 bucket: `python3 create-data-grant.py --dataset-name example_file_dataset --bucket example-bucket --key data/example.parquet --grant-name example_file_grant --grant-end-date 2024-12-01 --target-account-id 123456789012` 56 | 57 | **Note**: `grant-end-date` is an optional parameter. If not provided, the grant will not expire. -------------------------------------------------------------------------------- /providers/python/create-amazon-s3-files-data-grant/create-data-grant.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import time 3 | import click 4 | from botocore.exceptions import ClientError 5 | from datetime import datetime, timezone 6 | 7 | def create_dataset(dataexchange, dataset_name): 8 | """ 9 | Create a data files delivery method dataset 10 | """ 11 | try: 12 | dataset_params = { 13 | 'AssetType': 'S3_SNAPSHOT', 14 | 'Description': f'Dataset for {dataset_name}', 15 | 'Name': dataset_name, 16 | 'Tags': { 17 | 'Source': 'S3' 18 | } 19 | } 20 | 21 | response = dataexchange.create_data_set(**dataset_params) 22 | return response['Id'] 23 | except ClientError as e: 24 | click.echo(f"An error occurred while creating the dataset: {e}", err=True) 25 | raise 26 | 27 | def create_data_grant(dataexchange, dataset_id, grant_name, grant_end_date, target_account_id, grant_description=None): 28 | """ 29 | Create a data grant for the specified dataset 30 | """ 31 | try: 32 | grant_params = { 33 | 'Name': grant_name, 34 | 'GrantDistributionScope': 'NONE', # or 'AWS_ORGANIZATION' if applicable 35 | 'ReceiverPrincipal': target_account_id, 36 | 'SourceDataSetId': dataset_id, 37 | 'Description': grant_description or f'Data grant for {grant_name}', 38 | 'Tags': { 39 | 'CreatedBy': 'AutomationScript' 40 | } 41 | } 42 | if grant_end_date: 43 | grant_params['EndsAt'] = grant_end_date 44 | response = dataexchange.create_data_grant(**grant_params) 45 | return response['Id'] 46 | except ClientError as e: 47 | click.echo(f"An error occurred while creating the data grant: {e}", err=True) 48 | raise 49 | 50 | def create_revision(dataexchange, dataset_id): 51 | """ 52 | Create a new revision for the dataset 53 | """ 54 | try: 55 | response = dataexchange.create_revision(DataSetId=dataset_id) 56 | return response['Id'] 57 | except ClientError as e: 58 | click.echo(f"An error occurred while creating the revision: {e}", err=True) 59 | raise 60 | 61 | def add_asset_to_revision(dataexchange, dataset_id, revision_id, bucket, prefixes, keys): 62 | """ 63 | Add assets to the revision based on prefixes and keys, or entire bucket if none specified 64 | """ 65 | try: 66 | s3 = boto3.client('s3') 67 | asset_sources = [] 68 | 69 | if not prefixes and not keys: 70 | # If no prefixes or keys specified, include entire bucket 71 | response = s3.list_objects_v2(Bucket=bucket) 72 | for obj in response.get('Contents', []): 73 | asset_sources.append({ 74 | 'Bucket': bucket, 75 | 'Key': obj['Key'] 76 | }) 77 | else: 78 | # Process prefixes 79 | for prefix in prefixes: 80 | if prefix: 81 | response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) 82 | for obj in response.get('Contents', []): 83 | asset_sources.append({ 84 | 'Bucket': bucket, 85 | 'Key': obj['Key'] 86 | }) 87 | 88 | # Add individual keys 89 | for key in keys: 90 | asset_sources.append({ 91 | 'Bucket': bucket, 92 | 'Key': key 93 | }) 94 | 95 | # Print assets being added 96 | for asset in asset_sources: 97 | print(f"Adding asset: s3://{asset['Bucket']}/{asset['Key']}") 98 | 99 | # Create a single job for all assets 100 | job_details = { 101 | 'ImportAssetsFromS3': { 102 | 'AssetSources': asset_sources, 103 | 'DataSetId': dataset_id, 104 | 'RevisionId': revision_id 105 | } 106 | } 107 | 108 | response = dataexchange.create_job(Type='IMPORT_ASSETS_FROM_S3', Details=job_details) 109 | job_id = response['Id'] 110 | 111 | # Wait for job completion 112 | job_state = wait_for_job_completion(dataexchange, job_id) 113 | if job_state != 'COMPLETED': 114 | raise click.ClickException(f"Job failed with state: {job_state}") 115 | 116 | return "All assets added successfully" 117 | except ClientError as e: 118 | click.echo(f"An error occurred while adding the assets: {e}", err=True) 119 | raise 120 | 121 | def wait_for_job_completion(dataexchange, job_id): 122 | """ 123 | Wait for a job to complete 124 | """ 125 | dataexchange.start_job(JobId=job_id) 126 | 127 | while True: 128 | job_response = dataexchange.get_job(JobId=job_id) 129 | job_state = job_response['State'] 130 | 131 | if job_state in ['COMPLETED', 'ERROR', 'CANCELLED']: 132 | if job_state == 'ERROR': 133 | error_details = job_response.get('Errors', []) 134 | click.echo(f"Job failed with errors: {error_details}") 135 | return job_state 136 | 137 | click.echo(f"Job {job_id} is in {job_state} state. Waiting...") 138 | time.sleep(5) 139 | 140 | def finalize_revision(dataexchange, dataset_id, revision_id): 141 | """ 142 | Finalize the revision 143 | """ 144 | try: 145 | dataexchange.update_revision(DataSetId=dataset_id, RevisionId=revision_id, Finalized=True) 146 | except ClientError as e: 147 | click.echo(f"An error occurred while finalizing the revision: {e}", err=True) 148 | raise 149 | 150 | @click.command() 151 | @click.option('--bucket', required=True, help='S3 bucket name or full S3 URI') 152 | @click.option('--prefix', multiple=True, help='S3 prefix to include (can be used multiple times)') 153 | @click.option('--key', multiple=True, help='S3 object key to include (can be used multiple times)') 154 | @click.option('--dataset-name', required=True, help='Name of the dataset') 155 | @click.option('--grant-name', required=True, help='Name of the data grant') 156 | @click.option('--grant-end-date', type=click.DateTime(formats=["%Y-%m-%d"]), 157 | help='End date for the grant (YYYY-MM-DD). If not provided, grant will not expire.') 158 | @click.option('--target-account-id', required=True, help='Target AWS account ID') 159 | @click.option('--region', default='us-east-1', help='AWS region') 160 | @click.option('--grant-description', help='Custom description for the data grant') 161 | def main(bucket, prefix, key, dataset_name, grant_name, grant_end_date, target_account_id, region, grant_description): 162 | """ 163 | Create a dataset and data grant based on the provided parameters. 164 | """ 165 | # Parse bucket and initial prefix from the provided bucket parameter 166 | if bucket.startswith('s3://'): 167 | parts = bucket.replace('s3://', '').split('/', 1) 168 | bucket_name = parts[0] 169 | initial_prefix = parts[1] if len(parts) > 1 else '' 170 | else: 171 | bucket_name = bucket 172 | initial_prefix = '' 173 | 174 | # Combine initial_prefix with provided prefixes 175 | all_prefixes = [initial_prefix] if initial_prefix else [] 176 | all_prefixes.extend(prefix) 177 | 178 | dataexchange = boto3.client('dataexchange', region_name=region) 179 | 180 | try: 181 | # Create dataset 182 | dataset_id = create_dataset(dataexchange, dataset_name) 183 | click.echo(f"Dataset created successfully. Dataset ID: {dataset_id}") 184 | 185 | # Create revision 186 | revision_id = create_revision(dataexchange, dataset_id) 187 | click.echo(f"Revision created successfully. Revision ID: {revision_id}") 188 | 189 | # Add assets to revision 190 | result = add_asset_to_revision(dataexchange, dataset_id, revision_id, bucket_name, all_prefixes, key) 191 | click.echo(result) 192 | 193 | # Finalize revision 194 | finalize_revision(dataexchange, dataset_id, revision_id) 195 | click.echo("Revision finalized successfully.") 196 | 197 | # Format the grant_end_date if provided 198 | formatted_end_date = grant_end_date.replace(tzinfo=timezone.utc) if grant_end_date else None 199 | 200 | # Create data grant 201 | grant_id = create_data_grant(dataexchange, dataset_id, grant_name, formatted_end_date, target_account_id, grant_description) 202 | click.echo(f"Data grant created successfully. Grant ID: {grant_id}") 203 | 204 | except Exception as e: 205 | click.echo(f"An error occurred: {str(e)}", err=True) 206 | raise click.Abort() 207 | 208 | if __name__ == "__main__": 209 | main() -------------------------------------------------------------------------------- /providers/python/create-amazon-s3-files-data-grant/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>1.35 2 | botocore>=1.35 3 | Click>=7.0 4 | -------------------------------------------------------------------------------- /providers/python/revoke-revisions-from-published-dataset/README.md: -------------------------------------------------------------------------------- 1 | # Revoke a Revision and Delete all underlying assets (Python) 2 | 3 | This example will show how to revoke a revision and delete all the underlying assets from a dataset that is part of a 4 | published Data Product. This can be used to reduce the size of data stored in AWS Data Exchange managed S3 storage in 5 | case of Data files delivery method 6 | 7 | ### Setup 8 | 9 | Install the requirements: 10 | 11 | ```bash 12 | $ pip3 install -r requirements.txt 13 | ``` 14 | 15 | Set the AWS access key and secret environment variables: 16 | 17 | ``` 18 | $ export AWS_ACCESS_KEY_ID= 19 | $ export AWS_SECRET_ACCESS_KEY= 20 | $ export AWS_SESSION_TOKEN= 21 | ``` 22 | 23 | The user needs the **AWSDataExchangeProviderFullAccess** IAM policy associated with your role/account. Find out more 24 | about IAM policies on AWS Data Exchange [here](https://docs.aws.amazon.com/data-exchange/latest/userguide/auth-access.html). 25 | 26 | 27 | ### Example Usage 28 | 29 | Get usage help: `python3 revision_pruning.py --help` 30 | 31 | Revoke a revision and delete underlying assets: `python3 revision_pruning.py --data-set-id '643b9fb3df63ce7bae948a1662fa9888' --revision-id 'c27cb47d1cc98a96f0c317fe268c634a' --region us-east-1` 32 | -------------------------------------------------------------------------------- /providers/python/revoke-revisions-from-published-dataset/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.26.92 2 | botocore>=1.26.92 3 | Click>=7.0 4 | -------------------------------------------------------------------------------- /providers/python/revoke-revisions-from-published-dataset/revision_pruning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import datetime 3 | from pprint import pprint 4 | import boto3 5 | import click 6 | 7 | 8 | @click.command() 9 | @click.option('--data-set-id', help='dataset ID which needs to be pruned.') 10 | @click.option('--revision-id', help='revision ID which needs to be revoked and all underlying assets deleted.') 11 | @click.option('--region', default='us-east-1', help='AWS Region of the dataset.') 12 | def main(data_set_id, revision_id, region): 13 | client = boto3.client('dataexchange', region_name=region) 14 | assets = client.list_revision_assets( 15 | DataSetId=data_set_id, 16 | RevisionId=revision_id 17 | ) 18 | asset_ids = [d['Id'] for d in assets['Assets']] 19 | asset_names = [d['Name'] for d in assets['Assets']] 20 | print("Name of Assets that will be deleted once the revision is revoked: ") 21 | pprint(asset_names) 22 | # revoke the revision 23 | revoke_response = client.revoke_revision( 24 | DataSetId=data_set_id, 25 | RevisionId=revision_id, 26 | RevocationComment='revoking on ' + str(datetime.datetime.now()) 27 | ) 28 | pprint('revoke status of revision ' + revoke_response['Id'] + ' is ' + str(revoke_response['Revoked'])) 29 | # delete all underlying assets 30 | for asset_id in asset_ids: 31 | client.delete_asset( 32 | AssetId=asset_id, 33 | DataSetId=data_set_id, 34 | RevisionId=revision_id 35 | ) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /providers/ruby/add-data-set/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | gem 'aws-sdk-marketplacecatalog' 7 | -------------------------------------------------------------------------------- /providers/ruby/add-data-set/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | aws-eventstream (1.1.0) 5 | aws-partitions (1.373.0) 6 | aws-sdk-core (3.107.0) 7 | aws-eventstream (~> 1, >= 1.0.2) 8 | aws-partitions (~> 1, >= 1.239.0) 9 | aws-sigv4 (~> 1.1) 10 | jmespath (~> 1.0) 11 | aws-sdk-dataexchange (1.9.0) 12 | aws-sdk-core (~> 3, >= 3.99.0) 13 | aws-sigv4 (~> 1.1) 14 | aws-sdk-marketplacecatalog (1.7.0) 15 | aws-sdk-core (~> 3, >= 3.99.0) 16 | aws-sigv4 (~> 1.1) 17 | aws-sigv4 (1.2.2) 18 | aws-eventstream (~> 1, >= 1.0.2) 19 | jmespath (1.6.1) 20 | 21 | PLATFORMS 22 | ruby 23 | 24 | DEPENDENCIES 25 | aws-sdk-dataexchange 26 | aws-sdk-marketplacecatalog 27 | 28 | BUNDLED WITH 29 | 2.1.4 30 | -------------------------------------------------------------------------------- /providers/ruby/add-data-set/README.md: -------------------------------------------------------------------------------- 1 | # Add a New Data Set to a Published Product 2 | 3 | This sample creates and publishes a new data set by combining the [AWS Marketplace Catalog API](https://docs.aws.amazon.com/marketplace-catalog/latest/api-reference/welcome.html)'s [AddDataSets](https://docs.aws.amazon.com/es_es/data-exchange/latest/userguide/add-data-sets.html) change set, and the [AWS Data Exchange API](https://docs.aws.amazon.com/data-exchange/latest/apireference/welcome.html). 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`, `AWS_REGION` and `ENTITY_ID` that identifies the product in which to publish a data set. You can enumerate [owned data products](https://console.aws.amazon.com/dataexchange/home?region=us-east-1#/owned/products) using the [enumerate-data-products sample](../enumerate-data-products) to get this ID. 6 | 7 | ``` 8 | $ ENTITY_ID=prod-... bundle exec ruby add-data-set.rb 9 | 10 | Created a new Data Set 97267ce8224e3cae6286075d703f9e7f called "aws-dataexchange-api-samples test". 11 | Adding Data Set arn:aws:dataexchange:us-east-1:147854383891:data-sets/97267ce8224e3cae6286075d703f9e7f to "prod-jrcarqhoeypfs@11". 12 | Started change set a0z6l4wsl7jn5azcsjzy86zcl ................ done. 13 | Change set a0z6l4wsl7jn5azcsjzy86zcl published. 14 | Done. 15 | ``` -------------------------------------------------------------------------------- /providers/ruby/add-data-set/add-data-set.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'aws-sdk-dataexchange' 4 | require 'aws-sdk-marketplacecatalog' 5 | 6 | Aws.config.update( 7 | region: ENV['AWS_REGION'] || 'us-east-1', 8 | credentials: Aws::Credentials.new( 9 | ENV['AWS_ACCESS_KEY_ID'], 10 | ENV['AWS_SECRET_ACCESS_KEY'], 11 | ENV['AWS_SESSION_TOKEN'] 12 | ) 13 | ) 14 | 15 | dx = Aws::DataExchange::Client.new 16 | 17 | # create a Data Set 18 | 19 | data_set = dx.create_data_set( 20 | asset_type: 'S3_SNAPSHOT', 21 | name: 'aws-dataexchange-api-samples test', 22 | description: 'Test Data Set for aws-dataexchange-api-samples.', 23 | tags: { 24 | category: 'demo', 25 | language: 'ruby' 26 | } 27 | ) 28 | 29 | puts "Created a new Data Set #{data_set.id} called \"#{data_set.name}\"." 30 | 31 | catalog_name = 'AWSMarketplace' 32 | entity_id = ENV['ENTITY_ID'] || raise("missing ENV['ENTITY_ID']") 33 | 34 | catalog = Aws::MarketplaceCatalog::Client.new 35 | 36 | # describe the product entity in the AWS Marketplace catalog 37 | 38 | described_entity = catalog.describe_entity(catalog: catalog_name, entity_id: entity_id) 39 | described_entity_details = JSON.parse(described_entity.details) 40 | 41 | # add the new data set to the product 42 | puts "Adding Data Set #{data_set.arn} to \"#{described_entity.entity_identifier}\"." 43 | 44 | start_change_set = catalog.start_change_set( 45 | catalog: 'AWSMarketplace', 46 | change_set_name: "Publishing data set to #{entity_id}.", 47 | change_set: [ 48 | { 49 | change_type: 'AddDataSets', 50 | entity: { 51 | identifier: described_entity.entity_identifier, 52 | type: described_entity.entity_type 53 | }, 54 | details: JSON.dump( 55 | 'DataSets' => [ 56 | { 'Arn' => data_set.arn } 57 | ] 58 | ) 59 | } 60 | ] 61 | ) 62 | 63 | STDOUT.write "Started change set #{start_change_set.change_set_id} ..." 64 | 65 | chage_set_id = start_change_set.change_set_id 66 | loop do 67 | sleep 1 68 | 69 | describe_change_set = catalog.describe_change_set( 70 | catalog: 'AWSMarketplace', 71 | change_set_id: chage_set_id 72 | ) 73 | 74 | describe_change_set_status = describe_change_set.status 75 | break if describe_change_set_status == 'SUCCEEDED' 76 | 77 | if describe_change_set_status == 'FAILED' 78 | raise "#{describe_change_set.failure_description}\n#{describe_change_set 79 | .change_set.first.error_detail_list 80 | .map(&:error_message).join}" 81 | end 82 | 83 | STDOUT.write('.') 84 | end 85 | puts ' done.' 86 | 87 | puts "Change set #{chage_set_id} published." 88 | puts 'Done.' 89 | -------------------------------------------------------------------------------- /providers/ruby/add-revision-to-a-data-set/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | gem 'aws-sdk-marketplacecatalog' 7 | gem 'time_ago_in_words' 8 | -------------------------------------------------------------------------------- /providers/ruby/add-revision-to-a-data-set/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | aws-eventstream (1.0.3) 5 | aws-partitions (1.269.0) 6 | aws-sdk-core (3.89.1) 7 | aws-eventstream (~> 1.0, >= 1.0.2) 8 | aws-partitions (~> 1, >= 1.239.0) 9 | aws-sigv4 (~> 1.1) 10 | jmespath (~> 1.0) 11 | aws-sdk-dataexchange (1.0.0) 12 | aws-sdk-core (~> 3, >= 3.71.0) 13 | aws-sigv4 (~> 1.1) 14 | aws-sdk-marketplacecatalog (1.0.0) 15 | aws-sdk-core (~> 3, >= 3.71.0) 16 | aws-sigv4 (~> 1.1) 17 | aws-sigv4 (1.1.0) 18 | aws-eventstream (~> 1.0, >= 1.0.2) 19 | jmespath (1.6.1) 20 | time_ago_in_words (0.1.1) 21 | 22 | PLATFORMS 23 | ruby 24 | 25 | DEPENDENCIES 26 | aws-sdk-dataexchange 27 | aws-sdk-marketplacecatalog 28 | time_ago_in_words 29 | 30 | BUNDLED WITH 31 | 2.1.4 32 | -------------------------------------------------------------------------------- /providers/ruby/add-revision-to-a-data-set/README.md: -------------------------------------------------------------------------------- 1 | # Add a New Revision to a Data Set 2 | 3 | This sample adds a new finalized revision to a data set using data in S3 by combining the [AWS Marketplace Catalog API](https://docs.aws.amazon.com/marketplace-catalog/latest/api-reference/welcome.html)'s [AddRevisions](https://docs.aws.amazon.com/es_es/data-exchange/latest/userguide/add-revisions.html) change set, and the [AWS Data Exchange API](https://docs.aws.amazon.com/data-exchange/latest/apireference/welcome.html). 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`, `AWS_REGION` and `ENTITY_ID` that identifies the product that contains a data set to add a revision to. You can enumerate [owned data products](https://console.aws.amazon.com/dataexchange/home?region=us-east-1#/owned/products) using the [enumerate-data-products sample](../enumerate-data-products) to get this ID. 6 | 7 | ``` 8 | $ ENTITY_ID=prod-... bundle exec ruby add-revision-to-a-data-set.rb 9 | 10 | 6c6becda6a8fb086b945bbf1dca4e1f5: Junto Heartbeat updated 1 minute and 15 seconds ago 11 | Created revision 3e6d60894e5e3bee5c528d0ef5268f71 12 | Importing aws-samples-create-data-set-with-finalized-revision/data.txt from S3 ............. done. 13 | The revision 3e6d60894e5e3bee5c528d0ef5268f71 has been finalized. 14 | Started change set 2jqqse6runsgfz8uej5e2mpq6 ............ done. 15 | Change set 2jqqse6runsgfz8uej5e2mpq6 published. 16 | Done. 17 | ``` -------------------------------------------------------------------------------- /providers/ruby/add-revision-to-a-data-set/add-revision-to-a-data-set.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'aws-sdk-dataexchange' 4 | require 'aws-sdk-marketplacecatalog' 5 | require 'time_ago_in_words' 6 | 7 | Aws.config.update( 8 | region: ENV['AWS_REGION'] || 'us-east-1', 9 | credentials: Aws::Credentials.new( 10 | ENV['AWS_ACCESS_KEY_ID'], 11 | ENV['AWS_SECRET_ACCESS_KEY'], 12 | ENV['AWS_SESSION_TOKEN'] 13 | ) 14 | ) 15 | 16 | catalog_name = 'AWSMarketplace' 17 | entity_id = ENV['ENTITY_ID'] || raise("missing ENV['ENTITY_ID']") 18 | 19 | catalog = Aws::MarketplaceCatalog::Client.new 20 | 21 | # describe a specific entity 22 | 23 | described_entity = catalog.describe_entity(catalog: catalog_name, entity_id: entity_id) 24 | described_entity_details = JSON.parse(described_entity.details) 25 | 26 | # first data set 27 | data_set = described_entity_details['DataSets'].first 28 | raise 'Missing Data Set' unless data_set 29 | 30 | data_set_name = data_set['Name'] 31 | data_set_arn = data_set['DataSetArn'] 32 | data_set_id = Aws::ARNParser.parse(data_set_arn).resource.split('/').last 33 | data_set_last_revision_added_at = DateTime.parse(data_set['LastRevisionAddedDate']) 34 | puts "#{data_set_id}: #{data_set_name} updated #{data_set_last_revision_added_at.to_time.ago_in_words}" 35 | 36 | # create a revision and finalize it 37 | dx = Aws::DataExchange::Client.new 38 | 39 | revision = dx.create_revision( 40 | data_set_id: data_set_id, 41 | comment: 'New revision in the Data Set.' 42 | ) 43 | 44 | puts "Created revision #{revision.id}" 45 | 46 | # import data from S3 47 | 48 | s3_bucket_name = 'aws-samples-create-data-set-with-finalized-revision' 49 | s3_data_key = 'data.txt' 50 | 51 | STDOUT.write "Importing #{s3_bucket_name}/#{s3_data_key} from S3 ..." 52 | 53 | export_job = dx.create_job( 54 | type: 'IMPORT_ASSETS_FROM_S3', 55 | details: { 56 | import_assets_from_s3: { 57 | asset_sources: [ 58 | bucket: s3_bucket_name, 59 | key: s3_data_key 60 | ], 61 | data_set_id: data_set_id, 62 | revision_id: revision.id 63 | } 64 | } 65 | ) 66 | 67 | dx.start_job(job_id: export_job.id) 68 | 69 | loop do 70 | sleep 1 71 | job_in_progress = dx.get_job(job_id: export_job.id) 72 | STDOUT.write('.') 73 | state = job_in_progress.state 74 | next if state == 'IN_PROGRESS' || state == 'WAITING' 75 | break if state == 'COMPLETED' 76 | raise job_in_progress.errors.join(&:to_s) if job_in_progress.state == 'ERROR' 77 | 78 | raise job_in_progress.state 79 | end 80 | 81 | puts ' done.' 82 | 83 | # finalize the revision 84 | 85 | dx.update_revision( 86 | data_set_id: data_set_id, 87 | revision_id: revision.id, 88 | finalized: true 89 | ) 90 | 91 | # get the revision 92 | 93 | finalized_revision = dx.get_revision( 94 | data_set_id: data_set_id, 95 | revision_id: revision.id 96 | ) 97 | 98 | puts "The revision #{revision.id} has #{finalized_revision.finalized ? 'been finalized' : 'not been finalized'}." 99 | 100 | # add a finalized revision to the data set 101 | 102 | start_change_set = catalog.start_change_set( 103 | catalog: 'AWSMarketplace', 104 | change_set_name: "Adding revision to #{data_set_name}.", 105 | change_set: [ 106 | { 107 | change_type: 'AddRevisions', 108 | entity: { 109 | identifier: described_entity.entity_identifier, 110 | type: described_entity.entity_type 111 | }, 112 | details: JSON.dump( 113 | 'DataSetArn' => data_set_arn, 114 | 'RevisionArns' => [finalized_revision.arn] 115 | ) 116 | } 117 | ] 118 | ) 119 | 120 | STDOUT.write "Started change set #{start_change_set.change_set_id} ..." 121 | 122 | chage_set_id = start_change_set.change_set_id 123 | loop do 124 | sleep 1 125 | 126 | describe_change_set = catalog.describe_change_set( 127 | catalog: 'AWSMarketplace', 128 | change_set_id: chage_set_id 129 | ) 130 | 131 | describe_change_set_status = describe_change_set.status 132 | break if describe_change_set_status == 'SUCCEEDED' 133 | 134 | if describe_change_set_status == 'FAILED' 135 | raise "#{describe_change_set.failure_description}\n#{describe_change_set 136 | .change_set.first.error_detail_list 137 | .map(&:error_message).join}" 138 | end 139 | 140 | STDOUT.write('.') 141 | end 142 | puts ' done.' 143 | 144 | puts "Change set #{chage_set_id} published." 145 | puts 'Done.' 146 | -------------------------------------------------------------------------------- /providers/ruby/create-data-set-with-finalized-revision/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | -------------------------------------------------------------------------------- /providers/ruby/create-data-set-with-finalized-revision/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | aws-eventstream (1.0.3) 5 | aws-partitions (1.239.0) 6 | aws-sdk-core (3.77.0) 7 | aws-eventstream (~> 1.0, >= 1.0.2) 8 | aws-partitions (~> 1, >= 1.239.0) 9 | aws-sigv4 (~> 1.1) 10 | jmespath (~> 1.0) 11 | aws-sdk-dataexchange (1.0.0) 12 | aws-sdk-core (~> 3, >= 3.71.0) 13 | aws-sigv4 (~> 1.1) 14 | aws-sigv4 (1.1.0) 15 | aws-eventstream (~> 1.0, >= 1.0.2) 16 | jmespath (1.6.1) 17 | 18 | PLATFORMS 19 | ruby 20 | 21 | DEPENDENCIES 22 | aws-sdk-dataexchange 23 | 24 | BUNDLED WITH 25 | 1.17.3 26 | -------------------------------------------------------------------------------- /providers/ruby/create-data-set-with-finalized-revision/README.md: -------------------------------------------------------------------------------- 1 | # Create a New Data Set with a Finalized Revision 2 | 3 | This sample creates a new Data Set with a finalized revision using data in S3. 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`, `AWS_REGION`, `S3_BUCKET_NAME` and `S3_DATA_KEY` used to identify data to add to a revision from S3. 6 | 7 | ``` 8 | $ S3_BUCKET_NAME=... S3_DATA_KEY=... bundle exec ruby create-data-set-with-finalized-revision.rb 9 | 10 | Created a new Data Set 003686f310ac07ae35e8edd63af7e5b5 called "aws-dataexchange-api-samples test". 11 | Created revision 0ad999457bbc726d76acc4ff6361c9bf 12 | Importing bucket/file.txt from S3 .......... done. 13 | The revision 0ad999457bbc726d76acc4ff6361c9bf has been finalized. 14 | ``` 15 | -------------------------------------------------------------------------------- /providers/ruby/create-data-set-with-finalized-revision/create-data-set-with-finalized-revision.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'aws-sdk-dataexchange' 4 | 5 | Aws.config.update( 6 | region: ENV['AWS_REGION'] || 'us-east-1', 7 | credentials: Aws::Credentials.new( 8 | ENV['AWS_ACCESS_KEY_ID'], 9 | ENV['AWS_SECRET_ACCESS_KEY'], 10 | ENV['AWS_SESSION_TOKEN'] 11 | ) 12 | ) 13 | 14 | s3_bucket_name = ENV['S3_BUCKET_NAME'] || raise("missing ENV['S3_BUCKET_NAME']") 15 | s3_data_key = ENV['S3_DATA_KEY'] || raise("missing ENV['S3_DATA_KEY']") 16 | 17 | dx = Aws::DataExchange::Client.new 18 | 19 | # create a Data Set 20 | 21 | data_set = dx.create_data_set( 22 | asset_type: 'S3_SNAPSHOT', 23 | name: 'aws-dataexchange-api-samples test', 24 | description: 'Test Data Set for aws-dataexchange-api-samples.', 25 | tags: { 26 | category: 'demo', 27 | language: 'ruby' 28 | } 29 | ) 30 | 31 | puts "Created a new Data Set #{data_set.id} called \"#{data_set.name}\"." 32 | 33 | # create a revision in the Data Set 34 | 35 | revision = dx.create_revision( 36 | data_set_id: data_set.id, 37 | comment: 'First revision in the Data Set.' 38 | ) 39 | 40 | puts "Created revision #{revision.id}" 41 | 42 | # import data from S3 43 | 44 | STDOUT.write "Importing #{s3_bucket_name}/#{s3_data_key} from S3 ..." 45 | 46 | export_job = dx.create_job( 47 | type: 'IMPORT_ASSETS_FROM_S3', 48 | details: { 49 | import_assets_from_s3: { 50 | asset_sources: [ 51 | bucket: s3_bucket_name, 52 | key: s3_data_key 53 | ], 54 | data_set_id: data_set.id, 55 | revision_id: revision.id 56 | } 57 | } 58 | ) 59 | 60 | dx.start_job(job_id: export_job.id) 61 | 62 | loop do 63 | sleep 1 64 | job_in_progress = dx.get_job(job_id: export_job.id) 65 | STDOUT.write('.') 66 | state = job_in_progress.state 67 | next if state == 'IN_PROGRESS' || state == 'WAITING' 68 | break if state == 'COMPLETED' 69 | raise job_in_progress.errors.join(&:to_s) if job_in_progress.state == 'ERROR' 70 | 71 | raise job_in_progress.state 72 | end 73 | 74 | puts ' done.' 75 | 76 | # finalize the revision 77 | 78 | dx.update_revision( 79 | data_set_id: data_set.id, 80 | revision_id: revision.id, 81 | finalized: true 82 | ) 83 | 84 | # get the revision 85 | 86 | finalized_revision = dx.get_revision( 87 | data_set_id: data_set.id, 88 | revision_id: revision.id 89 | ) 90 | 91 | puts "The revision #{revision.id} has #{finalized_revision.finalized ? 'been finalized' : 'not been finalized'}." 92 | 93 | # cleanup 94 | 95 | puts 'Cleaning up, deleting data set.' 96 | 97 | dx.delete_data_set( 98 | data_set_id: data_set.id 99 | ) 100 | 101 | puts 'Done.' 102 | -------------------------------------------------------------------------------- /providers/ruby/enumerate-data-products/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | gem 'aws-sdk-marketplacecatalog' 7 | gem 'time_ago_in_words' 8 | -------------------------------------------------------------------------------- /providers/ruby/enumerate-data-products/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | aws-eventstream (1.0.3) 5 | aws-partitions (1.269.0) 6 | aws-sdk-core (3.89.1) 7 | aws-eventstream (~> 1.0, >= 1.0.2) 8 | aws-partitions (~> 1, >= 1.239.0) 9 | aws-sigv4 (~> 1.1) 10 | jmespath (~> 1.0) 11 | aws-sdk-dataexchange (1.0.0) 12 | aws-sdk-core (~> 3, >= 3.71.0) 13 | aws-sigv4 (~> 1.1) 14 | aws-sdk-marketplacecatalog (1.0.0) 15 | aws-sdk-core (~> 3, >= 3.71.0) 16 | aws-sigv4 (~> 1.1) 17 | aws-sigv4 (1.1.0) 18 | aws-eventstream (~> 1.0, >= 1.0.2) 19 | jmespath (1.6.1) 20 | time_ago_in_words (0.1.1) 21 | 22 | PLATFORMS 23 | ruby 24 | 25 | DEPENDENCIES 26 | aws-sdk-dataexchange 27 | aws-sdk-marketplacecatalog 28 | time_ago_in_words 29 | 30 | BUNDLED WITH 31 | 2.1.4 32 | -------------------------------------------------------------------------------- /providers/ruby/enumerate-data-products/README.md: -------------------------------------------------------------------------------- 1 | # Enumerate Data Products 2 | 3 | This sample uses the [AWS Marketplace Catalog API](https://docs.aws.amazon.com/marketplace-catalog/latest/api-reference/welcome.html) to enumerate data products, examines each product's data sets, parses each data set's Arn and fetches the data set using the [AWS Data Exchange API](https://docs.aws.amazon.com/data-exchange/latest/apireference/welcome.html). 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` and `AWS_REGION`. 6 | 7 | ``` 8 | $ bundle exec ruby enumerate-data-products.rb 9 | 10 | Enumerating available data products ... 11 | 78aa8ff8-3eaf-4996-bcf8-67c7a1910528: AWS Data Exchange Heartbeat (Test product) 12 | Junto Heartbeat updated on 9 minutes and 55 seconds ago 13 | c4750301bac97b6bcf7fa3571a5761f3: Heartbeat 14 | prod-jrcarqhoeypfs: Junto Heartbeat Test 15 | Junto Heartbeat updated on 1 hour and 58 minutes ago 16 | 6c6becda6a8fb086b945bbf1dca4e1f5: Junto Heartbeat Test 17 | prod-pi7x52bjwfa3m: AWS Data Exchange Subscription Verification (Test Product) 18 | Subscription Verification Test updated on 113 days and 2 hours ago 19 | 97baee13810ff94ea80c95325c6f7bce: Subscription Verification Test 20 | Done. 21 | ``` 22 | -------------------------------------------------------------------------------- /providers/ruby/enumerate-data-products/enumerate-data-products.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'aws-sdk-marketplacecatalog' 4 | require 'aws-sdk-dataexchange' 5 | require 'time_ago_in_words' 6 | 7 | Aws.config.update( 8 | region: ENV['AWS_REGION'] || 'us-east-1', 9 | credentials: Aws::Credentials.new( 10 | ENV['AWS_ACCESS_KEY_ID'], 11 | ENV['AWS_SECRET_ACCESS_KEY'], 12 | ENV['AWS_SESSION_TOKEN'] 13 | ) 14 | ) 15 | 16 | catalog_name = 'AWSMarketplace' 17 | 18 | catalog = Aws::MarketplaceCatalog::Client.new 19 | dx = Aws::DataExchange::Client.new 20 | 21 | # data products, what you see in https://console.aws.amazon.com/dataexchange/home?region=us-east-1#/owned/products 22 | 23 | puts 'Enumerating available data products ...' 24 | 25 | entities = catalog.list_entities( 26 | catalog: catalog_name, 27 | entity_type: 'DataProduct' 28 | ).entity_summary_list 29 | 30 | entities.each do |entity| 31 | puts "#{entity.entity_id}: #{entity.name}" 32 | 33 | # entity details 34 | described_entity = catalog.describe_entity(catalog: catalog_name, entity_id: entity.entity_id) 35 | described_entity_details = JSON.parse(described_entity.details) 36 | 37 | described_entity_details['DataSets'].each do |data_set| 38 | puts " #{data_set['Name']} updated on #{Time.parse(data_set['LastRevisionAddedDate']).ago_in_words}" 39 | data_set_arn = Aws::ARNParser.parse(data_set['DataSetArn']) 40 | # TODO: be region-aware, currently dx client region is set globally 41 | data_set_id = data_set_arn.resource.split('/').last 42 | data_set = dx.get_data_set(data_set_id: data_set_id) 43 | puts " #{data_set.id}: #{data_set.name}" 44 | end 45 | end 46 | 47 | puts 'Done.' 48 | -------------------------------------------------------------------------------- /providers/usage-metrics/README.md: -------------------------------------------------------------------------------- 1 | 2 | AWS Data Exchange for Amazon S3 unlocks new use cases for providers to offer data files via AWS Data Exchange. Providers can 3 | easily set up AWS Data Exchange for Amazon S3 on top of their existing S3 buckets to share direct access to an entire S3 bucket or specific prefixes and S3 objects. 4 | AWS Data Exchange automatically manages subscriptions, entitlements, billing, and payment. 5 | 6 | This package sets up the infrastructure via CloudFormation in a Data Providers' own AWS Account 7 | to report and visualize subscriber usage on AWS Data Exchange for Amazon S3 data sets. 8 | 9 | ### Architecture 10 | 11 | This package creates the following infrastructure via CloudFormation 12 | 13 | 1. CloudTrail to log all readonly events on S3 Access points across all regions 14 | 2. S3 Bucket to store the logs 15 | 3. Glue Job that runs daily to process the json logs to parquet files 16 | 4. Glue Database and table to query the data and derive insights 17 | 18 | ![architecture](./img/architecture.png) 19 | 20 | ### Setup 21 | 22 | #### Prerequisites 23 | You should have 24 | * An AWS Data Exchange for Amazon S3 product and data set published. 25 | * Permissions to upload file to an existing S3 bucket and deploy a cloudformation template 26 | 27 | #### Deployment 28 | 29 | 1. Clone the repository to your local system. 30 | 2. Upload the [python script](./source/adx_metrics_processing.py) to an existing S3 bucket in your AWS Account. You can refer [aws docs on how to upload files](https://docs.aws.amazon.com/AmazonS3/latest/userguide/upload-objects.html). 31 | 3. Copy the s3 URI of the python script. 32 | 4. Create a CloudFormation Stack by uploading the [CloudFormation Template](./source/adx_metrics_infrastructure.yaml) through the Cloudformation console. The stack requires 4 parameters: 33 | ``` 34 | CloudTrailName : Name of the multiregion CloudTrial 35 | GlueDatabaseName : Name of the Glue Database 36 | GlueScriptLocation : S3 path of the Glue script that you copied in Step 3. 37 | GlueJobScheduledHour : Scheduled hour to run the glue job daily 38 | ``` 39 | The default values are provided for your convenience. The stack takes about 2-4 minutes to be created. 40 | 41 | The stack creates a Glue job runs only once a day. You will be able to see the 42 | table with usage data once Glue job has been successful. 43 | 44 | Note: If your AWS Glue resources are managed by AWS Lake Formation, it's crucial to grant the required data lake permissions for these resources. 45 | Failure to do so may result in Glue jobs encountering "Insufficient Lake Formation permissions," or “Unable to verify existence of default database” leading to job failures. Please review below - 46 | * The glue job execution role needs to have Super permissions on the glue database ‘adx_metrics_curated’ and ‘All tables’ in the ‘adx_metrics_curated’ database. 47 | * Grant DESCRIBE Lake Formation permission to Default glue database to be able to verify the existence of the default database. 48 | 49 | 50 | ### Example Queries 51 | Once the table has been created and the Job has been successful, you can query the data 52 | in [Amazon Athena](https://www.amazonaws.cn/en/athena/). Here are a few Sample Queries: 53 | 54 | * Query to see unique subscriber AWS account IDs that have accessed the AWS Data Exchange for S3 datasets in a given year and month 55 | ``` 56 | SELECT distinct subscriber_account_id FROM "adx_metrics_curated"."adx_for_s3_metrics" 57 | where year='2023'and month ='05' and subscriber_account_id != 'anonymous' 58 | ``` 59 | 60 | * Type and number of events(get, list, head) of each type in a given month and year. 61 | ``` 62 | SELECT eventName, count(1) as eventCount FROM "adx_metrics_curated"."adx_for_s3_metrics" 63 | where year='2023'and month ='05' and subscriber_account_id != 'anonymous' group by eventName order by eventCount desc 64 | ``` -------------------------------------------------------------------------------- /providers/usage-metrics/img/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-dataexchange-api-samples/eb511854eb9743874f3679967dd04c9a2ae5c7bc/providers/usage-metrics/img/architecture.png -------------------------------------------------------------------------------- /providers/usage-metrics/source/adx_metrics_infrastructure.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Description: "Template for AWS Data Exchange for Amazon S3 Usage metrics infrastructure" 3 | 4 | Parameters: 5 | CloudTrailName: 6 | Description: Name of the Cloud Trial 7 | Type: String 8 | Default: "adx-for-s3-accesspoint-trail" 9 | GlueDatabaseName: 10 | Description: Glue database name 11 | Type: String 12 | Default: "adx_metrics_curated" 13 | GlueScriptLocation: 14 | Description: S3 path of the Glue script. Example - s3://BucketName/foldername/adx_metrics_processing.py 15 | Type: String 16 | Default: "" 17 | GlueJobScheduledHour: 18 | Description: Scheduled hour to run the glue job everyday. Allowed values - 1 to 23 19 | Type: String 20 | Default: "1" 21 | 22 | 23 | Resources: 24 | 25 | # Create an AWS Glue database 26 | ADXmetricsdatabase: 27 | Type: AWS::Glue::Database 28 | Properties: 29 | CatalogId: !Ref AWS::AccountId 30 | DatabaseInput: 31 | Name: !Sub "${GlueDatabaseName}" 32 | Description: Database to hold tables for ADX for S3 usage metrics 33 | 34 | LoggingBucket: 35 | Type: AWS::S3::Bucket 36 | Properties: 37 | AccessControl: Private 38 | BucketName: !Sub "adx-for-s3-${AWS::Region}-${AWS::AccountId}-metrics-bucket" 39 | 40 | TrailBucketPolicy: 41 | Type: "AWS::S3::BucketPolicy" 42 | DependsOn: 43 | - LoggingBucket 44 | Properties: 45 | Bucket: !Ref LoggingBucket 46 | PolicyDocument: 47 | Version: "2012-10-17" 48 | Statement: 49 | - Effect: Allow 50 | Principal: 51 | Service: "cloudtrail.amazonaws.com" 52 | Action: "s3:GetBucketAcl" 53 | Resource: !Sub "arn:aws:s3:::${LoggingBucket}" 54 | - Effect: Allow 55 | Principal: 56 | Service: "cloudtrail.amazonaws.com" 57 | Action: "s3:PutObject" 58 | Resource: !Sub "arn:aws:s3:::${LoggingBucket}/AWSLogs/${AWS::AccountId}/*" 59 | Condition: 60 | StringEquals: 61 | "s3:x-amz-acl": "bucket-owner-full-control" 62 | 63 | CloudTrail: 64 | Type: "AWS::CloudTrail::Trail" 65 | DependsOn: 66 | - TrailBucketPolicy 67 | - LoggingBucket 68 | Properties: 69 | IsLogging: true 70 | S3BucketName: !Ref LoggingBucket # the name of the Amazon S3 bucket designated for publishing log files 71 | TrailName: !Ref CloudTrailName 72 | IsMultiRegionTrail: true 73 | IncludeGlobalServiceEvents: true 74 | AdvancedEventSelectors: 75 | - FieldSelectors: 76 | - Field: resources.type 77 | Equals: 78 | - AWS::S3::AccessPoint 79 | - Field: eventCategory 80 | Equals: 81 | - Data 82 | - Field: readOnly 83 | Equals: 84 | - true 85 | 86 | GlueJobRole: 87 | Type: 'AWS::IAM::Role' 88 | Properties: 89 | RoleName: "adx-for-s3-glue-job-role" 90 | AssumeRolePolicyDocument: 91 | Version: '2012-10-17' 92 | Statement: 93 | - Effect: Allow 94 | Principal: 95 | Service: glue.amazonaws.com 96 | Action: 'sts:AssumeRole' 97 | Path: / 98 | ManagedPolicyArns: 99 | - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole 100 | Policies: 101 | - PolicyName: S3ReadAccess 102 | PolicyDocument: 103 | Version: '2012-10-17' 104 | Statement: 105 | - Effect: Allow 106 | Action: 107 | - 's3:GetObject' 108 | - "s3:ListBucket" 109 | Resource: 110 | - !Join 111 | - '' 112 | - - 'arn:aws:s3:::' 113 | - !Select 114 | - 1 115 | - !Split 116 | - 's3://' 117 | - !Ref GlueScriptLocation 118 | - PolicyName: S3ReadWriteAccess 119 | PolicyDocument: 120 | Version: '2012-10-17' 121 | Statement: 122 | - Effect: Allow 123 | Action: 124 | - 's3:GetObject' 125 | - "s3:ListBucket" 126 | - 's3:PutObject' 127 | - 's3:DeleteObject' 128 | Resource: 129 | - !Sub "arn:aws:s3:::${LoggingBucket}" 130 | - !Sub "arn:aws:s3:::${LoggingBucket}/*" 131 | GlueJob: 132 | Type: AWS::Glue::Job 133 | DependsOn: GlueJobRole 134 | Properties: 135 | Name: "adx-for-s3-glue-job" 136 | Description: "Glue Job for ADX for S3 metrics" 137 | MaxRetries: 1 138 | Role: !Ref GlueJobRole 139 | WorkerType: G.1X 140 | ExecutionClass: STANDARD 141 | GlueVersion: "3.0" 142 | NumberOfWorkers: 10 143 | Timeout: 2880 144 | DefaultArguments: 145 | "--OVERRIDE_S3_READ_PATH" : "no" 146 | "--S3_READ_PATH" : !Sub "s3://${LoggingBucket}/AWSLogs/${AWS::AccountId}/CloudTrail/*/*/*/*/" 147 | "--enable-glue-datacatalog" : "true" 148 | "--S3_WRITE_PATH" : !Sub "s3://${LoggingBucket}/adx_for_s3_metrics" 149 | "--job-bookmark-option" : "job-bookmark-enable" 150 | "--TempDir" : !Sub "s3://${LoggingBucket}/glue/temporary/" 151 | "--S3_CLOUDTRAIL_BASE_PATH" : !Sub "s3://${LoggingBucket}/AWSLogs/${AWS::AccountId}/CloudTrail/*" 152 | "--enable-metrics" : "true" 153 | "--enable-spark-ui" : "true" 154 | "--encryption-type" : "sse-s3" 155 | "--spark-event-logs-path" : !Sub "s3://${LoggingBucket}/glue/sparkHistoryLogs/" 156 | "--enable-job-insights" : "true" 157 | "--GLUE_DATABASE" : !Sub "${GlueDatabaseName}" 158 | "--GLUE_TABLE_NAME" : "adx_for_s3_metrics" 159 | "--enable-continuous-cloudwatch-log" : "true" 160 | "--job-language" : "python" 161 | "--enable-auto-scaling" : "true" 162 | 163 | Command: 164 | Name: glueetl 165 | PythonVersion: 3 166 | ScriptLocation: !Sub ${GlueScriptLocation} 167 | 168 | GlueJobScheduledJobTrigger: 169 | Type: AWS::Glue::Trigger 170 | DependsOn: GlueJob 171 | Properties: 172 | Type: SCHEDULED 173 | Description: Glue job schedule for ADX for S3 metrics 174 | Schedule: !Sub cron(0 ${GlueJobScheduledHour} * * ? *) 175 | StartOnCreation: true 176 | Actions: 177 | - JobName: "adx-for-s3-glue-job" 178 | Name: "adx-for-s3-job-schedule" -------------------------------------------------------------------------------- /providers/usage-metrics/source/adx_metrics_processing.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from awsglue.transforms import * 4 | from awsglue.utils import getResolvedOptions 5 | from pyspark.context import SparkContext 6 | from awsglue.context import GlueContext 7 | from awsglue.job import Job 8 | from datetime import date 9 | from datetime import timedelta 10 | from pyspark.sql.functions import input_file_name 11 | from awsglue.dynamicframe import DynamicFrame 12 | from pyspark.sql.utils import AnalysisException 13 | import boto3 14 | 15 | ## @params: [JOB_NAME,S3_CLOUDTRAIL_BASE_PATH, S3_WRITE_PATH, GLUE_DATABASE, GLUE_TABLE_NAME, OVERRIDE_S3_READ_PATH, S3_READ_PATH] 16 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 17 | 'S3_CLOUDTRAIL_BASE_PATH', 18 | 'S3_WRITE_PATH', 19 | 'GLUE_DATABASE', 20 | 'GLUE_TABLE_NAME', 21 | 'OVERRIDE_S3_READ_PATH', 22 | 'S3_READ_PATH' 23 | ]) 24 | 25 | print("Job Parameters - Start") 26 | print("JOB_NAME : ", args['JOB_NAME']) 27 | print("S3_CLOUDTRAIL_BASE_PATH : ", args['S3_CLOUDTRAIL_BASE_PATH']) 28 | print("S3_WRITE_PATH : ", args['S3_WRITE_PATH']) 29 | print("GLUE_DATABASE : ", args['GLUE_DATABASE']) 30 | print("GLUE_TABLE_NAME : ", args['GLUE_TABLE_NAME']) 31 | print("OVERRIDE_S3_READ_PATH : ", args['OVERRIDE_S3_READ_PATH']) 32 | #print("S3_READ_PATH : ", args['S3_READ_PATH']) 33 | print("Job Parameters - End") 34 | 35 | 36 | 37 | ##Determine s3_read_path 38 | 39 | if args['OVERRIDE_S3_READ_PATH'].upper() == 'YES': 40 | s3_read_path=args['S3_READ_PATH'] 41 | else: 42 | ##Construct directory for previous day logs: 43 | today = date.today() 44 | previous_day = today - timedelta(days = 1) 45 | print("Process Cloud Trail Logs for : ", previous_day) 46 | 47 | year=previous_day.year 48 | month='{:02d}'.format(previous_day.month) 49 | day='{:02d}'.format(previous_day.day) 50 | s3_read_path=args['S3_CLOUDTRAIL_BASE_PATH'] + '/' + str(year) + '/' + str(month) + '/' + str(day) + '/' 51 | 52 | print("S3 Read Path : ", s3_read_path) 53 | 54 | ##Start Spark Job 55 | 56 | sc = SparkContext() 57 | 58 | glueContext = GlueContext(sc) 59 | spark = glueContext.spark_session 60 | job = Job(glueContext) 61 | job.init(args['JOB_NAME'], args) 62 | 63 | print("Spark Job Start") 64 | 65 | #read cloudtrail log files 66 | try: 67 | dataFrame = spark.read\ 68 | .option("multiline", "true")\ 69 | .json(s3_read_path) 70 | except AnalysisException as err: 71 | #print(f"{type(err).__name__} was raised: {err}") 72 | print(err) 73 | #err1="Something Else" 74 | if (str(err).split(':')[0] == "Path does not exist"): 75 | job.commit() 76 | os._exit(0) 77 | else: 78 | raise err 79 | 80 | dataFrame_files=dataFrame.withColumn("filename", input_file_name()) 81 | 82 | #For Debug 83 | #dataFrame_files.show() 84 | #dataFrame_files.printSchema() 85 | 86 | dataFrame_files.createOrReplaceTempView("logs_json") 87 | spark.sql("describe logs_json").show() 88 | 89 | #Transform - Expand Data Types to get needed fields 90 | t2=spark.sql("Select explode(Records), filename from logs_json") 91 | #t2.printSchema() 92 | t2.createOrReplaceTempView("logs_json_2") 93 | 94 | t3=spark.sql("Select col,explode(col.resources)as resources, filename from logs_json_2") 95 | t3.createOrReplaceTempView("logs_json_3") 96 | 97 | #Filter ADX Accounts 98 | t4=spark.sql("""Select * from logs_json_3 99 | where resources.type = 'AWS::S3::AccessPoint' 100 | and 101 | resources.accountId IN 102 | ('540564263739', 103 | '504002150500', 104 | '337040091392', 105 | '366362662752', 106 | '330489627928', 107 | '291973504423', 108 | '291973504423', 109 | '461002523379', 110 | '036905324694', 111 | '675969394711', 112 | '108584782536', 113 | '844053218156')""") 114 | 115 | t4.createOrReplaceTempView("logs_json_filtered") 116 | 117 | #Get required fields 118 | t5=spark.sql("""Select 119 | col.eventID, 120 | col.eventName, 121 | col.eventTime, 122 | col.awsRegion as provider_bucket_region, 123 | col.userIdentity.accountId as subscriber_account_id, 124 | col.sourceIPAddress as subscriber_ip_address, 125 | col.userAgent as subscriber_user_agent, 126 | col.requestParameters.bucketName as provider_bucket_name, 127 | col.requestParameters.`x-amz-request-payer` as x_amz_request_payer, 128 | col.requestParameters.prefix as prefix, 129 | col.additionalEventData.bytesTransferredOut as bytes_accessed, 130 | col.resources.arn as access_point_arn, 131 | col.tlsDetails.tlsVersion as tls_version, 132 | array_join(slice(split(filename,'/'), -2, 1),'') as day, 133 | array_join(slice(split(filename,'/'), -3, 1),'') as month , 134 | array_join(slice(split(filename,'/'), -4, 1),'') as year, 135 | array_join(slice(split(filename,'/'), -5, 1),'') as region 136 | from logs_json_filtered""") 137 | 138 | t5.show(truncate = True) 139 | t5_s3_write=DynamicFrame.fromDF(t5, glueContext, "t5_s3_write") 140 | 141 | print("Write to S3 Start") 142 | 143 | sink_out = glueContext.getSink(connection_type="s3", path=args['S3_WRITE_PATH'], 144 | enableUpdateCatalog=True, updateBehavior="UPDATE_IN_DATABASE", 145 | partitionKeys=["year", "month", "day"]) 146 | sink_out.setFormat("glueparquet") 147 | sink_out.setCatalogInfo(catalogDatabase=args['GLUE_DATABASE'], catalogTableName=args['GLUE_TABLE_NAME']) 148 | sink_out.writeFrame(t5_s3_write) 149 | print("Write to S3 End") 150 | 151 | print("Spark Job End") 152 | 153 | 154 | 155 | job.commit() 156 | 157 | -------------------------------------------------------------------------------- /subscribers/cpp/all-entitled-datasets/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /subscribers/cpp/all-entitled-datasets/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(all-entitled-datasets) 3 | 4 | option(BUILD_SHARED_LIBS "Build shared libraries" ON) 5 | 6 | find_package(AWSSDK REQUIRED COMPONENTS dataexchange) 7 | add_executable(all-entitled-datasets main.cpp) 8 | 9 | target_compile_features(all-entitled-datasets PUBLIC cxx_std_11) 10 | target_link_libraries(all-entitled-datasets ${AWSSDK_LINK_LIBRARIES}) 11 | -------------------------------------------------------------------------------- /subscribers/cpp/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (C++) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets. 4 | 5 | To build the sample, install the [AWS SDK for C++](https://github.com/aws/aws-sdk-cpp#building-the-sdk) and use [CMake](https://cmake.org/). 6 | 7 | ``` 8 | cpp/all-entitled-datasets $ mkdir build 9 | cpp/all-entitled-datasets $ cd build 10 | cpp/all-entitled-datasets/build $ cmake .. 11 | cpp/all-entitled-datasets/build $ make 12 | ``` 13 | 14 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` and `AWS_REGION`. 15 | 16 | ``` 17 | $ ./all-entitled-datasets 18 | 19 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 20 | Over 80,000 property sales in New York City in 2014 21 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 22 | Over 80,000 property sales in New York City in 2015 23 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 24 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 25 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 26 | Over 80,000 property sales in New York City in 2016 27 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 28 | Records of over 80,000 property sales transactions. 29 | ``` 30 | -------------------------------------------------------------------------------- /subscribers/cpp/all-entitled-datasets/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char** argv) 8 | { 9 | Aws::SDKOptions options; 10 | Aws::InitAPI(options); 11 | 12 | { 13 | Aws::DataExchange::DataExchangeClient client; 14 | 15 | Aws::DataExchange::Model::ListDataSetsRequest list_data_sets_options; 16 | list_data_sets_options.SetOrigin("ENTITLED"); 17 | 18 | auto outcome = client.ListDataSets(list_data_sets_options); 19 | 20 | if (outcome.IsSuccess()) { 21 | Aws::Vector data_sets_list = outcome.GetResult().GetDataSets(); 22 | 23 | for (auto const &data_set: data_sets_list) { 24 | std::cout 25 | << data_set.GetOriginDetails().GetProductId() << "/" 26 | << data_set.GetId() << ": " 27 | << data_set.GetName() << std::endl 28 | << " " << data_set.GetDescription() 29 | << std::endl; 30 | } 31 | } else { 32 | std::cerr << "ListDataSets error: " 33 | << outcome.GetError().GetExceptionName() << " - " 34 | << outcome.GetError().GetMessage() << std::endl; 35 | } 36 | } 37 | 38 | Aws::ShutdownAPI(options); 39 | } -------------------------------------------------------------------------------- /subscribers/dotnet/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | obj 3 | .vscode 4 | 5 | -------------------------------------------------------------------------------- /subscribers/dotnet/all-entitled-datasets/AwsDataExchangeSample.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | Exe 4 | netcoreapp3.1 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /subscribers/dotnet/all-entitled-datasets/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading.Tasks; 3 | using Amazon.DataExchange; 4 | using Amazon.DataExchange.Model; 5 | 6 | namespace AwsDataExchangeSample 7 | { 8 | class Program 9 | { 10 | static void Main(string[] args) 11 | { 12 | AmazonDataExchangeClient client = new AmazonDataExchangeClient(); 13 | 14 | ListDataSetsRequest listDataSetsRequest = new ListDataSetsRequest(); 15 | listDataSetsRequest.Origin = "ENTITLED"; 16 | 17 | Task dataSetsRequestTask = client.ListDataSetsAsync(listDataSetsRequest); 18 | dataSetsRequestTask.Wait(); 19 | 20 | foreach (DataSetEntry dataSetEntry in dataSetsRequestTask.Result.DataSets) 21 | { 22 | Console.WriteLine("{0}/{1}: {2}\n {3}", 23 | dataSetEntry.OriginDetails.ProductId, 24 | dataSetEntry.Id, 25 | dataSetEntry.Name, 26 | dataSetEntry.Description); 27 | } 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /subscribers/dotnet/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (DotNet) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets, in .NET. 4 | 5 | To run the sample, install .NET, set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. 6 | 7 | ``` 8 | $ dotnet run Program.cs 9 | 10 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 11 | Over 80,000 property sales in New York City in 2016 12 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 13 | Over 80,000 property sales in New York City in 2014 14 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 15 | Over 80,000 property sales in New York City in 2015 16 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 17 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 18 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 19 | Records of over 80,000 property sales transactions. 20 | ``` 21 | 22 | ### Implementation Details 23 | 24 | This project was built on a Mac. 25 | 26 | Download and install .NET SDK using the [install script](https://docs.microsoft.com/en-us/dotnet/core/tools/dotnet-install-script). 27 | 28 | ``` 29 | $ wget https://dot.net/v1/dotnet-install.sh 30 | $ chmod 700 dotnet-install.sh 31 | $ ./dotnet-install.sh --install-dir ~/Library/DotNet 32 | 33 | dotnet-install: Downloading link: https://dotnetcli.azureedge.net/dotnet/Sdk/3.1.100/dotnet-sdk-3.1.100-osx-x64.tar.gz 34 | dotnet-install: Extracting zip from https://dotnetcli.azureedge.net/dotnet/Sdk/3.1.100/dotnet-sdk-3.1.100-osx-x64.tar.gz 35 | dotnet-install: Installation finished successfully. 36 | ``` 37 | 38 | Add .NET to `PATH`, edit `~/.bash_profile`. 39 | 40 | ``` 41 | # Add .NET to PATH 42 | export PATH="$PATH:$HOME/Library/DotNet" 43 | ``` 44 | 45 | Create a new console app. 46 | 47 | ``` 48 | $ dotnet new console --name AwsDataExchangeSample 49 | $ cd AwsDataExchangeSample 50 | ``` 51 | 52 | Add the AWS Data Exchange SDK from [Nuget](https://www.nuget.org/packages/AWSSDK.DataExchange/). 53 | 54 | ``` 55 | $ dotnet add package AWSSDK.DataExchange --version 3.3.100.15 56 | ``` 57 | 58 | Modify the code, run the app. 59 | 60 | ``` 61 | $ dotnet run Program.cs 62 | ``` 63 | -------------------------------------------------------------------------------- /subscribers/go/adx-for-apis-simple/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | This sample code will show you how to integrate with the [AWS Data Exchange for APIs (Test Product)][APITestProduct]. This simple test product echoes the request sent to it, so you can try out different HTTP Methods (GET, POST, etc.), Query String Parameters, Headers, and Body content as you explore API based data consumption. By the end of this exercise you'll know how to use the [AWS Data Exchange Client Software Development Kit (SDK)][Tools] with [Go][AWSDataExchangeSDKForGo] to make a `SendApiAsset` request to an API based AWS Data Exchange product. 4 | 5 | This will typically consist of three main stages: 6 | 1. Import relevant SDK Clients and Interfaces, establish base Client configuration, and instantiate the Client. \ 7 | _(This stage will remain consistent across all potential AWS Data Exchange for APIs use cases)_ 8 | 2. Define the relevant Provider / Product specific identities) \ 9 | _(This stage will remain consistent across all uses of a given Product)_ 10 | 3. Define the request-specific parameters based on your business need. \ 11 | _(This stage will likely change for every request)_ 12 | 13 | ## Getting Started 14 | Install go version 1.19.X from ["Get Started - The Go Programming Language"][GoGettingStarted]. 15 | 16 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the ["AWS Data Exchange for APIs (Test product)"][APITestProduct] Product, and subscribe. 17 | Copy the relevant DataSetId, RevisionId, and AssetId from the Entitled Data page and paste them into the Product Info variables (assetId, revisionId, dataSetId) in the code sample (main.go) (you will likely find they match the sample contents). Next, update the `sendApiAssetInput` variable based on your desired request parameters. Again, for test purposes, the provided inputs should work just fine. Finally, execute the sample program using: go run main.go . 18 | 19 | To assist with finding the necessary inputs for the `sendApiAssetInput` variable, the Data Exchange console provides Sample CLI requests as shown below. The first 3 parameters map to the Product Info variables (dataSetId, revisionId and assetId), and the rest map to sendApiAssetInput 20 | ``` 21 | aws dataexchange send-api-asset \ 22 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 23 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 24 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 25 | --method POST \ 26 | --path "/" \ 27 | --query-string-parameters 'param1=value1,param2=value2' \ 28 | --request-headers 'header=header_value' \ 29 | --body "{\"body_param\":\"body_param_value\"}" 30 | ``` 31 | 32 | ## Authentication 33 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. For local development purposes, this will typically use credentials provided to the AWS CLI by [`aws configure`][AWSConfigure]. When running on Amazon EC2 it will typically use the [EC2 Instance Profile][IAMRolesForEC2], and for AWS Lambda it will use the [Lambda Execution Role][LambdaExecutionRole]. 34 | 35 | [GoGettingStarted]: https://go.dev/learn 36 | [APITestProduct]: https://us-east-1.console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-pgkxrurxwmp76 37 | [Tools]: https://aws.amazon.com/tools/ 38 | [AWSDataExchangeSDKForGo]: https://docs.aws.amazon.com/sdk-for-go/api/service/dataexchange/ 39 | [IAMRolesForEC2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html 40 | [LambdaExecutionRole]: https://docs.aws.amazon.com/lambda/latest/dg/lambda-intro-execution-role.html 41 | [AWSConfigure]: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html 42 | -------------------------------------------------------------------------------- /subscribers/go/adx-for-apis-simple/go.mod: -------------------------------------------------------------------------------- 1 | module sandpit/adx 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/aws/aws-sdk-go-v2 v1.16.7 // indirect 7 | github.com/aws/aws-sdk-go-v2/config v1.15.13 // indirect 8 | github.com/aws/aws-sdk-go-v2/credentials v1.12.8 // indirect 9 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 // indirect 10 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 // indirect 11 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 // indirect 12 | github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 // indirect 13 | github.com/aws/aws-sdk-go-v2/service/dataexchange v1.15.8 // indirect 14 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.8 // indirect 15 | github.com/aws/aws-sdk-go-v2/service/sso v1.11.11 // indirect 16 | github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 // indirect 17 | github.com/aws/smithy-go v1.12.0 // indirect 18 | ) 19 | -------------------------------------------------------------------------------- /subscribers/go/adx-for-apis-simple/go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-sdk-go-v2 v1.16.7 h1:zfBwXus3u14OszRxGcqCDS4MfMCv10e8SMJ2r8Xm0Ns= 2 | github.com/aws/aws-sdk-go-v2 v1.16.7/go.mod h1:6CpKuLXg2w7If3ABZCl/qZ6rEgwtjZTn4eAf4RcEyuw= 3 | github.com/aws/aws-sdk-go-v2/config v1.15.13 h1:CJH9zn/Enst7lDiGpoguVt0lZr5HcpNVlRJWbJ6qreo= 4 | github.com/aws/aws-sdk-go-v2/config v1.15.13/go.mod h1:AcMu50uhV6wMBUlURnEXhr9b3fX6FLSTlEV89krTEGk= 5 | github.com/aws/aws-sdk-go-v2/credentials v1.12.8 h1:niTa7zc7uyOP2ufri0jPESBt1h9yP3Zc0q+xzih3h8o= 6 | github.com/aws/aws-sdk-go-v2/credentials v1.12.8/go.mod h1:P2Hd4Sy7mXRxPNcQMPBmqszSJoDXexX8XEDaT6lucO0= 7 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 h1:VfBdn2AxwMbFyJN/lF/xuT3SakomJ86PZu3rCxb5K0s= 8 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8/go.mod h1:oL1Q3KuCq1D4NykQnIvtRiBGLUXhcpY5pl6QZB2XEPU= 9 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 h1:2C0pYHcUBmdzPj+EKNC4qj97oK6yjrUhc1KoSodglvk= 10 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14/go.mod h1:kdjrMwHwrC3+FsKhNcCMJ7tUVj/8uSD5CZXeQ4wV6fM= 11 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 h1:2J+jdlBJWEmTyAwC82Ym68xCykIvnSnIN18b8xHGlcc= 12 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8/go.mod h1:ZIV8GYoC6WLBW5KGs+o4rsc65/ozd+eQ0L31XF5VDwk= 13 | github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 h1:QquxR7NH3ULBsKC+NoTpilzbKKS+5AELfNREInbhvas= 14 | github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15/go.mod h1:Tkrthp/0sNBShQQsamR7j/zY4p19tVTAs+nnqhH6R3c= 15 | github.com/aws/aws-sdk-go-v2/service/dataexchange v1.15.8 h1:st69NxWrfbGNwSt4OTii9pJjE7fS8Thou7KYYunzwYg= 16 | github.com/aws/aws-sdk-go-v2/service/dataexchange v1.15.8/go.mod h1:HUZEjEW/R4ExpSMTEhiIzINafg7TCdiJ9seTRPVUTgg= 17 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.8 h1:oKnAXxSF2FUvfgw8uzU/v9OTYorJJZ8eBmWhr9TWVVQ= 18 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.8/go.mod h1:rDVhIMAX9N2r8nWxDUlbubvvaFMnfsm+3jAV7q+rpM4= 19 | github.com/aws/aws-sdk-go-v2/service/sso v1.11.11 h1:XOJWXNFXJyapJqQuCIPfftsOf0XZZioM0kK6OPRt9MY= 20 | github.com/aws/aws-sdk-go-v2/service/sso v1.11.11/go.mod h1:MO4qguFjs3wPGcCSpQ7kOFTwRvb+eu+fn+1vKleGHUk= 21 | github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 h1:yOfILxyjmtr2ubRkRJldlHDFBhf5vw4CzhbwWIBmimQ= 22 | github.com/aws/aws-sdk-go-v2/service/sts v1.16.9/go.mod h1:O1IvkYxr+39hRf960Us6j0x1P8pDqhTX+oXM5kQNl/Y= 23 | github.com/aws/smithy-go v1.12.0 h1:gXpeZel/jPoWQ7OEmLIgCUnhkFftqNfwWUwAHSlp1v0= 24 | github.com/aws/smithy-go v1.12.0/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= 25 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 26 | github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 27 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 28 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 29 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 30 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 31 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 32 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 33 | -------------------------------------------------------------------------------- /subscribers/go/adx-for-apis-simple/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Please see the README.md on the GitHub AWS Data Exchange Samples repository for a more detailed overview with links to relevant AWS documentation. 3 | 4 | This code is provided as a sample of how to use the AWS Data Exchange Client Software Development Kit (SDK) to connect to Data Exchange For APIs 5 | based Data Sets. This will typically consist of three main stages: 6 | 1. Import relevant SDK Clients, establish base Client configuration, and instantiate the Client. 7 | (This stage will remain consistent across all potential AWS Data Exchange for APIs use cases) 8 | 2. Define the relevant Provider / Product specific identities) 9 | (This stage will remain consistent across all uses of a given Product. 10 | 3. Define the request-specific parameters based on your business need. 11 | (This stage will likely change for every request) 12 | 13 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the "AWS Data Exchange for APIs (Test product)" 14 | Product, and subscribe. 15 | Copy the relevant DataSetId, RevisionId, and AssetId from the Entitled Data page and paste them into the Product Info variables (assetId, revisionId, dataSetId) below 16 | 17 | Familiarity with go programming language is assumed. For go programming language documentation visit: https://go.dev/doc/tutorial/getting-started 18 | 19 | To assist with finding the necessary inputs for the productInfo and sendApiAssetInput values, the Data Exchange console provides 20 | Sample CLI requests as shown below. The first 3 parameters map to the productInfo constant, and the rest map to sendApiAssetInput 21 | aws dataexchange send-api-asset \ 22 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 23 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 24 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 25 | --method POST \ 26 | --path "/" \ 27 | --query-string-parameters 'param1=value1,param2=value2' \ 28 | --request-headers 'header=header_value' \ 29 | --body "{\"body_param\":\"body_param_value\"}" 30 | 31 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. 32 | For local development purposes, this will typically use credentials provided to the AWS CLI by `aws configure` 33 | When running on Amazon EC2 it will typically use the EC2 Instance Profile, and for AWS Lambda it will use the Lambda Execution Role. 34 | 35 | To execute this code: 36 | 37 | go run main.go 38 | */ 39 | 40 | package main 41 | 42 | import ( 43 | "context" 44 | "encoding/json" 45 | "fmt" 46 | "log" 47 | 48 | // Import golang aws sdk config and data exchange client 49 | "github.com/aws/aws-sdk-go-v2/config" 50 | "github.com/aws/aws-sdk-go-v2/service/dataexchange" 51 | ) 52 | 53 | var ( 54 | method = "POST" 55 | path = "/" 56 | // Populate Product Info variables (assetId, revisionId, dataSetid) based on SendApiAssetInput type, providing just the mandatory parameters which will be consistent across requests. The examples below are the AWS Data Exchange for APIs (Test product) in us-east-1 57 | assetId = "4e94198cfdb8400793fb3f0411861960" 58 | revisionId = "32559097c7d209b02af6de5cad4385fe" 59 | dataSetId = "8d494cba5e4720e5f6072e280daf70a8" 60 | ) 61 | 62 | func main() { 63 | // Using the SDK's default configuration, loading additional config 64 | // and credentials values from the environment variables, shared 65 | // credentials, and shared configuration files 66 | cfg, err := config.LoadDefaultConfig(context.TODO(), config.WithRegion("us-east-1")) 67 | if err != nil { 68 | log.Fatalf("unable to load SDK config, %v", err) 69 | } 70 | 71 | // Using the Config value, create the Data Exchange client 72 | svc := dataexchange.NewFromConfig(cfg) 73 | 74 | // request body content 75 | var body = map[string]string{"body_param": "body_param_value"} 76 | jsonBody, err := json.Marshal(body) 77 | 78 | // error out if body conversion into json fails 79 | if err != nil { 80 | log.Fatalf("Unable to marshal body to json.") 81 | } 82 | 83 | // convert request body to a string 84 | jsonBodyString := string(jsonBody) 85 | 86 | // query string parameters 87 | var queryStringParameters = map[string]string{ 88 | "param1": "value1", 89 | "param2": "value2", 90 | } 91 | 92 | // set request content type to be json 93 | requestHeaders := map[string]string{"Content-Type": "application/json"} 94 | 95 | // Populate sendApiAssetInput variable based on SendApiAssetInput struct by merging Product Info variables (assetId, revisionId, dataSetId) with additional request specific parameters 96 | sendApiAssetInput := dataexchange.SendApiAssetInput{ 97 | AssetId: &assetId, 98 | RevisionId: &revisionId, 99 | DataSetId: &dataSetId, 100 | Body: &jsonBodyString, 101 | Method: &method, 102 | Path: &path, 103 | QueryStringParameters: queryStringParameters, 104 | RequestHeaders: requestHeaders, 105 | } 106 | 107 | // make a request to the AWS Data Exchange engpoint for the configured product 108 | sendApiAssetOutput, err := svc.SendApiAsset(context.TODO(), &sendApiAssetInput) 109 | 110 | // Error out if request failed 111 | if err != nil { 112 | log.Fatalf("SendApiAsset call failed.", err) 113 | } 114 | 115 | // Display response headers 116 | fmt.Println() 117 | fmt.Println("Response Headers:") 118 | fmt.Println() 119 | for key, value := range sendApiAssetOutput.ResponseHeaders { 120 | fmt.Println(key, " = ", value) 121 | } 122 | 123 | // Display response body 124 | fmt.Println() 125 | fmt.Println("Response Body:") 126 | fmt.Println() 127 | fmt.Println(*sendApiAssetOutput.Body) 128 | 129 | } 130 | -------------------------------------------------------------------------------- /subscribers/go/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (Go) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets, in Go. 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and optionally `AWS_SESSION_TOKEN` in your ~/.aws/credentials file. 6 | 7 | ``` 8 | $ go build all-entitled-datasets.go 9 | 10 | $ go run all-entitled-datasets.go 11 | 12 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 13 | Over 80,000 property sales in New York City in 2014 14 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 15 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 16 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 17 | Over 80,000 property sales in New York City in 2015 18 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 19 | Over 80,000 property sales in New York City in 2016 20 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 21 | Records of over 80,000 property sales transactions. 22 | ``` 23 | -------------------------------------------------------------------------------- /subscribers/go/all-entitled-datasets/all-entitled-datasets.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/aws/aws-sdk-go/aws" 5 | "github.com/aws/aws-sdk-go/aws/session" 6 | "github.com/aws/aws-sdk-go/service/dataexchange" 7 | "fmt" 8 | ) 9 | 10 | func main() { 11 | // Initialize a session that the SDK will use to load 12 | // credentials from the shared credentials file ~/.aws/credentials 13 | // and region from the shared configuration file ~/.aws/config. 14 | sess := session.Must(session.NewSessionWithOptions(session.Options{ 15 | SharedConfigState: session.SharedConfigEnable, 16 | })) 17 | svc := dataexchange.New(sess) 18 | 19 | dataSetList, err := svc.ListDataSets(&dataexchange.ListDataSetsInput{ 20 | Origin: aws.String("ENTITLED"), 21 | }) 22 | 23 | if err != nil { 24 | fmt.Println(err.Error()) 25 | return 26 | } 27 | 28 | for _, dataSet := range dataSetList.DataSets { 29 | fmt.Printf("%#s/%#s: %#s\n %#s\n", *dataSet.OriginDetails.ProductId, *dataSet.Id, *dataSet.Name, *dataSet.Description) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /subscribers/java/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.jar 3 | *.class 4 | target -------------------------------------------------------------------------------- /subscribers/java/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (Java) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets, in Java. 4 | 5 | To run the sample, install Maven, set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. 6 | 7 | ``` 8 | $ mvn compile 9 | $ mvn exec:java -Dexec.mainClass="com.amazonaws.dataexchange.App" 10 | 11 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 12 | Over 80,000 property sales in New York City in 2014 13 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 14 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 15 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 16 | Over 80,000 property sales in New York City in 2015 17 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 18 | Over 80,000 property sales in New York City in 2016 19 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 20 | Records of over 80,000 property sales transactions. 21 | ``` 22 | 23 | ### Implementation Details 24 | 25 | The project was generated with [Maven](https://maven.apache.org/guides/getting-started/maven-in-five-minutes.html). 26 | 27 | ``` 28 | $ mvn -B archetype:generate \ 29 | -DarchetypeGroupId=org.apache.maven.archetypes \ 30 | -DgroupId=com.amazonaws.dataexchange \ 31 | -DartifactId=all-entitled-datasets 32 | ``` 33 | 34 | A dependency on AWS SDK was added to [pom.xml](pom.xml). 35 | 36 | ```xml 37 | 38 | 39 | 40 | com.amazonaws 41 | aws-java-sdk-bom 42 | 1.11.693 43 | pom 44 | import 45 | 46 | 47 | 48 | ``` 49 | 50 | Added AWS Data Exchange SDK to [pom.xml](pom.xml). 51 | 52 | ``` 53 | 54 | com.amazonaws 55 | aws-java-sdk-dataexchange 56 | 57 | ``` 58 | 59 | Upgraded compiler source and target to 1.8 in [pom.xml](pom.xml). 60 | 61 | ``` 62 | 63 | 1.8 64 | 1.8 65 | 66 | ``` 67 | 68 | Compile and run. 69 | 70 | ``` 71 | $ mvn compile 72 | $ mvn exec:java -Dexec.mainClass="com.amazonaws.dataexchange.App" 73 | ``` 74 | -------------------------------------------------------------------------------- /subscribers/java/all-entitled-datasets/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | org.example.basicapp 5 | all-entitled-datasets 6 | jar 7 | 1.0-SNAPSHOT 8 | all-entitled-datasets 9 | http://maven.apache.org 10 | 11 | 1.8 12 | 1.8 13 | 14 | 15 | 16 | 17 | com.amazonaws 18 | aws-java-sdk-bom 19 | 1.11.693 20 | pom 21 | import 22 | 23 | 24 | 25 | 26 | 27 | junit 28 | junit 29 | 4.13.1 30 | test 31 | 32 | 33 | com.amazonaws 34 | aws-java-sdk-dataexchange 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /subscribers/java/all-entitled-datasets/src/main/java/com/amazonaws/dataexchange/App.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.dataexchange; 2 | 3 | import com.amazonaws.services.dataexchange.*; 4 | import com.amazonaws.services.dataexchange.model.*; 5 | 6 | public class App { 7 | public static void main(String[] args) { 8 | AWSDataExchange client = AWSDataExchangeClientBuilder.defaultClient(); 9 | 10 | ListDataSetsRequest listDataSetsRequest = new ListDataSetsRequest() 11 | .withOrigin("ENTITLED"); 12 | 13 | ListDataSetsResult dataSets = client.listDataSets(listDataSetsRequest); 14 | 15 | for (DataSetEntry dataSet : dataSets.getDataSets()) { 16 | System.out.printf("%s/%s: %s\n %s\n", 17 | dataSet.getOriginDetails().getProductId(), 18 | dataSet.getId(), 19 | dataSet.getName(), 20 | dataSet.getDescription()); 21 | } 22 | 23 | System.exit(0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /subscribers/javascript/adx-for-apis-simple/.gitignore: -------------------------------------------------------------------------------- 1 | # Skip node_modules 2 | node_modules/ 3 | 4 | # Don't check in the transpiled JS files 5 | *.js -------------------------------------------------------------------------------- /subscribers/javascript/adx-for-apis-simple/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | This sample code will show you how to integrate with the [AWS Data Exchange for APIs (Test Product)][APITestProduct]. This simple test product echoes the request sent to it, so you can try out different HTTP Methods (GET, POST, etc.), Query String Parameters, Headers, and Body content as you explore API based data consumption. By the end of this exercise you'll know how to use the [AWS Data Exchange Client Software Development Kit (SDK)][Tools] with [JavaScript / Node.JS][AWSDataExchangeSDKForJavaScript] to make a `SendApiAsset` request to an API based AWS Data Exchange product. 4 | 5 | This will typically consist of three main stages: 6 | 1. Import relevant SDK Clients and Interfaces, establish base Client configuration, and instantiate the Client. \ 7 | _(This stage will remain consistent across all potential AWS Data Exchange for APIs use cases)_ 8 | 2. Define the relevant Provider / Product specific identities) \ 9 | _(This stage will remain consistent across all uses of a given Product)_ 10 | 3. Define the request-specific parameters based on your business need. \ 11 | _(This stage will likely change for every request)_ 12 | 13 | ## Getting Started 14 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the ["AWS Data Exchange for APIs (Test product)"][APITestProduct] Product, and subscribe. 15 | Copy the relevant `DataSetId`, `RevisionId`, and `AssetId` from the Entitled Data page and paste them into the `productInfo` constant in the code sample (adx4apis.js) (you will likely find they match the sample contents). Next, update the `sendApiAssetCommandInput` constant based on your desired request parameters. Again, for test purposes, the provided inputs should work just fine. Finally, install the necessary dependencies (@aws-sdk/client-dataexchange) using `npm install`, transpile from TypeScript to Javascript using `tsc`, and then execute the script with `node adx4api`. 16 | 17 | To assist with finding the necessary inputs for the `productInfo` and `sendApiAssetCommandInput` constants, the Data Exchange console provides Sample CLI requests as shown below. The first 3 parameters map to the productInfo constant, and the rest map to sendApiAssetCommandInput 18 | ``` 19 | aws dataexchange send-api-asset \ 20 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 21 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 22 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 23 | --method POST \ 24 | --path "/" \ 25 | --query-string-parameters 'param1=value1,param2=value2' \ 26 | --request-headers 'header=header_value' \ 27 | --body "{\"body_param\":\"body_param_value\"}" 28 | ``` 29 | 30 | ## Authentication 31 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. For local development purposes, this will typically use credentials provided to the AWS CLI by [`aws configure`][AWSConfigure]. When running on Amazon EC2 it will typically use the [EC2 Instance Profile][IAMRolesForEC2], and for AWS Lambda it will use the [Lambda Execution Role][LambdaExecutionRole]. 32 | 33 | [APITestProduct]: https://us-east-1.console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-pgkxrurxwmp76 34 | [Tools]: https://aws.amazon.com/tools/ 35 | [AWSDataExchangeSDKForJavaScript]: https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-dataexchange/index.html 36 | [IAMRolesForEC2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html 37 | [LambdaExecutionRole]: https://docs.aws.amazon.com/lambda/latest/dg/lambda-intro-execution-role.html 38 | [AWSConfigure]: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html -------------------------------------------------------------------------------- /subscribers/javascript/adx-for-apis-simple/adx4api.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Please see the README.md on the GitHub AWS Data Exchange Samples repository for a more detailed overview with links to relevant AWS documentation. 3 | 4 | This sample code will show you how to integrate with the [AWS Data Exchange for APIs (Test Product)][APITestProduct]. This simple test product 5 | echoes the request sent to it, so you can try out different HTTP Methods (GET, POST, etc.), Query String Parameters, Headers, and Body content 6 | as you explore API based data consumption. By the end of this exercise you'll know how to use the [AWS Data Exchange Client Software Development 7 | Kit (SDK)][Tools] with [JavaScript / Node.JS][AWSDataExchangeSDKForJavaScript] to make a `SendApiAsset` request to an API based AWS Data Exchange product. 8 | 9 | This will typically consist of three main stages: 10 | 1. Import relevant SDK Clients and Interfaces, establish base Client configuration, and instantiate the Client. 11 | (This stage will remain consistent across all potential AWS Data Exchange for APIs use cases) 12 | 2. Define the relevant Provider / Product specific identities) 13 | (This stage will remain consistent across all uses of a given Product. 14 | 3. Define the request-specific parameters based on your business need. 15 | (This stage will likely change for every request) 16 | 17 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the "AWS Data Exchange for APIs (Test product)" 18 | Product, and subscribe. 19 | Copy the relevant DataSetId, RevisionId, and AssetId from the Entitled Data page and paste them into the productInfo constant below 20 | (you will likely find they match the sample contents). Next, update the sendApiAssetCommandInput constant based on your desired request 21 | parameters. Again, for test purposes, the provided inputs should work just fine. Finally, install the necessary dependencies 22 | (@aws-sdk/client-dataexchange) using `npm install`, transpile from TypeScript to Javascript using `tsc`, and then execute the script with `node adx4api`. 23 | 24 | To assist with finding the necessary inputs for the productInfo and sendApiAssetCommandInput constants, the Data Exchange console provides 25 | Sample CLI requests as shown below. The first 3 parameters map to the productInfo constant, and the rest map to sendApiAssetCommandInput 26 | aws dataexchange send-api-asset \ 27 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 28 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 29 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 30 | --method POST \ 31 | --path "/" \ 32 | --query-string-parameters 'param1=value1,param2=value2' \ 33 | --request-headers 'header=header_value' \ 34 | --body "{\"body_param\":\"body_param_value\"}" 35 | 36 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. 37 | For local development purposes, this will typically use credentials provided to the AWS CLI by `aws configure` 38 | When running on Amazon EC2 it will typically use the EC2 Instance Profile, and for AWS Lambda it will use the Lambda Execution Role. 39 | */ 40 | 41 | //Import all relevant Clients and Interfaces from the @aws-sdk/client-dataexchange SDK 42 | import { DataExchangeClient, DataExchangeClientConfig, SendApiAssetCommand, SendApiAssetCommandInput, SendApiAssetCommandOutput } from "@aws-sdk/client-dataexchange"; 43 | 44 | //Populate DataExchangeClientConfig with Region and Logger requirements 45 | const dataExchangeClientConfig: DataExchangeClientConfig = { 46 | region: "us-east-1", 47 | logger: { 48 | debug: console.debug, 49 | info: console.log, 50 | warn: console.warn, 51 | error: console.error 52 | } 53 | } 54 | 55 | //Instantiate DataExchangeClient 56 | const dataExchangeClient = new DataExchangeClient(dataExchangeClientConfig); 57 | 58 | //Populate productInfo object based on SendApiAssetCommandInput interface, providing just the mandatory parameters which will be consistent across requests. The examples below are the AWS Data Exchange for APIs (Test product) in us-east-1 59 | const productInfo: SendApiAssetCommandInput = { 60 | DataSetId: "8d494cba5e4720e5f6072e280daf70a8", 61 | RevisionId: "32559097c7d209b02af6de5cad4385fe", 62 | AssetId: "4e94198cfdb8400793fb3f0411861960" 63 | } 64 | 65 | //Populate sendApiAssetCommandInput object based on SendApiAssetCommand interface by merging productInfo object with additional request specific parameters 66 | const sendApiAssetCommandInput: SendApiAssetCommandInput = { 67 | ...productInfo, 68 | //This can be GET, PUT, POST, etc. depending on the Provider API 69 | Method: "POST", 70 | //This depends on the Provider API and data being requested 71 | Path: "/", 72 | //These depend on the Provider API and should be provided as a JSON Object 73 | QueryStringParameters: { 74 | param1: "value1", 75 | param2: "value2" 76 | }, 77 | //These depend on the Provider API and should be provided as a JSON Object. Note that the AWS Data Exchange Test API product requires "Content-Type": "application/json" 78 | RequestHeaders: { 79 | "Content-Type": "application/json" 80 | }, 81 | //This depends on the Provider API 82 | Body: JSON.stringify({ 83 | body_param: "body_param_value" 84 | }) 85 | } 86 | 87 | //Create asynchronous function to make an ADX for APIs Subscriber Call 88 | async function makeAdxForApiSubscriberCall (sendApiAssetCommandInput: SendApiAssetCommandInput) { 89 | 90 | //Instantiate SendApiAssetCommand 91 | const sendApiAssetCommand = new SendApiAssetCommand(sendApiAssetCommandInput); 92 | 93 | //Send command using DataExchangeClient 94 | try { 95 | const sendApiAssetCommandOutput: SendApiAssetCommandOutput = await dataExchangeClient.send(sendApiAssetCommand); 96 | console.log("Output"); 97 | console.log(sendApiAssetCommandOutput); 98 | } catch (err) { 99 | //Log errors 100 | console.error("Error") 101 | console.error(err); 102 | } 103 | } 104 | 105 | //Invoke function to make ADX for APIs Subscriber Call 106 | (async () => { 107 | await makeAdxForApiSubscriberCall(sendApiAssetCommandInput); 108 | })(); -------------------------------------------------------------------------------- /subscribers/javascript/adx-for-apis-simple/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "adx-for-apis-subscriber-sample-typescript", 3 | "version": "1.0.0", 4 | "description": "Typescript sample ADX for APIs subscriber activity", 5 | "main": "adx4api.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/aws-samples/aws-dataexchange-api-samples.git" 12 | }, 13 | "author": "", 14 | "license": "ISC", 15 | "bugs": { 16 | "url": "https://github.com/aws-samples/aws-dataexchange-api-samples/issues" 17 | }, 18 | "homepage": "https://github.com/aws-samples/aws-dataexchange-api-samples#readme", 19 | "dependencies": { 20 | "@aws-sdk/client-dataexchange": "^3.350.0" 21 | }, 22 | "devDependencies": { 23 | "typescript": "^4.7.4" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /subscribers/javascript/adx-for-apis-simple/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */ 4 | 5 | /* Projects */ 6 | // "incremental": true, /* Enable incremental compilation */ 7 | // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ 8 | // "tsBuildInfoFile": "./", /* Specify the folder for .tsbuildinfo incremental compilation files. */ 9 | // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects */ 10 | // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ 11 | // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ 12 | 13 | /* Language and Environment */ 14 | "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ 15 | // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 16 | // "jsx": "preserve", /* Specify what JSX code is generated. */ 17 | // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */ 18 | // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ 19 | // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */ 20 | // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ 21 | // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */ 22 | // "reactNamespace": "", /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */ 23 | // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ 24 | // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ 25 | 26 | /* Modules */ 27 | "module": "commonjs", /* Specify what module code is generated. */ 28 | // "rootDir": "./", /* Specify the root folder within your source files. */ 29 | // "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */ 30 | // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ 31 | // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ 32 | // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ 33 | // "typeRoots": [], /* Specify multiple folders that act like `./node_modules/@types`. */ 34 | // "types": [], /* Specify type package names to be included without being referenced in a source file. */ 35 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 36 | // "resolveJsonModule": true, /* Enable importing .json files */ 37 | // "noResolve": true, /* Disallow `import`s, `require`s or ``s from expanding the number of files TypeScript should add to a project. */ 38 | 39 | /* JavaScript Support */ 40 | // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */ 41 | // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ 42 | // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from `node_modules`. Only applicable with `allowJs`. */ 43 | 44 | /* Emit */ 45 | // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ 46 | // "declarationMap": true, /* Create sourcemaps for d.ts files. */ 47 | // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ 48 | // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ 49 | // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If `declaration` is true, also designates a file that bundles all .d.ts output. */ 50 | // "outDir": "./", /* Specify an output folder for all emitted files. */ 51 | // "removeComments": true, /* Disable emitting comments. */ 52 | // "noEmit": true, /* Disable emitting files from a compilation. */ 53 | // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ 54 | // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types */ 55 | // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ 56 | // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ 57 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 58 | // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ 59 | // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ 60 | // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ 61 | // "newLine": "crlf", /* Set the newline character for emitting files. */ 62 | // "stripInternal": true, /* Disable emitting declarations that have `@internal` in their JSDoc comments. */ 63 | // "noEmitHelpers": true, /* Disable generating custom helper functions like `__extends` in compiled output. */ 64 | // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ 65 | // "preserveConstEnums": true, /* Disable erasing `const enum` declarations in generated code. */ 66 | // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ 67 | // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ 68 | 69 | /* Interop Constraints */ 70 | // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ 71 | // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ 72 | "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables `allowSyntheticDefaultImports` for type compatibility. */ 73 | // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ 74 | "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ 75 | 76 | /* Type Checking */ 77 | "strict": true, /* Enable all strict type-checking options. */ 78 | // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied `any` type.. */ 79 | // "strictNullChecks": true, /* When type checking, take into account `null` and `undefined`. */ 80 | // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ 81 | // "strictBindCallApply": true, /* Check that the arguments for `bind`, `call`, and `apply` methods match the original function. */ 82 | // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ 83 | // "noImplicitThis": true, /* Enable error reporting when `this` is given the type `any`. */ 84 | // "useUnknownInCatchVariables": true, /* Type catch clause variables as 'unknown' instead of 'any'. */ 85 | // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ 86 | // "noUnusedLocals": true, /* Enable error reporting when a local variables aren't read. */ 87 | // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read */ 88 | // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ 89 | // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ 90 | // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ 91 | // "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */ 92 | // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ 93 | // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type */ 94 | // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ 95 | // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ 96 | 97 | /* Completeness */ 98 | // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ 99 | "skipLibCheck": true /* Skip type checking all .d.ts files. */ 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /subscribers/javascript/all-entitled-datasets/.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | node_modules/ 3 | -------------------------------------------------------------------------------- /subscribers/javascript/all-entitled-datasets/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | -------------------------------------------------------------------------------- /subscribers/javascript/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (JavaScript) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets, in JavaScript, using AWS SDK v3. 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and optionally `AWS_SESSION_TOKEN`. 6 | 7 | ``` 8 | $ npm install 9 | 10 | $ npm run-script build 11 | 12 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 13 | Over 80,000 property sales in New York City in 2014 14 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 15 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 16 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 17 | Over 80,000 property sales in New York City in 2015 18 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 19 | Over 80,000 property sales in New York City in 2016 20 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 21 | Records of over 80,000 property sales transactions. 22 | ``` 23 | -------------------------------------------------------------------------------- /subscribers/javascript/all-entitled-datasets/index.ts: -------------------------------------------------------------------------------- 1 | import { DataExchange, DataSetEntry } from '@aws-sdk/client-dataexchange' 2 | 3 | const dataexchange = new DataExchange({ 4 | region: process.env.AWS_REGION || 'us-east-1', 5 | credentials: { 6 | accessKeyId: process.env.AWS_ACCESS_KEY_ID, 7 | secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, 8 | sessionToken: process.env.AWS_SESSION_TOKEN 9 | } 10 | }); 11 | 12 | void async function () { 13 | const entitledDataSets = await dataexchange.listDataSets({ Origin: 'ENTITLED' }); 14 | 15 | entitledDataSets.DataSets.forEach((dataSet: DataSetEntry) => { 16 | console.log(`${dataSet.OriginDetails.ProductId}/${dataSet.Id}: ${dataSet.Name}\n ${dataSet.Description}`); 17 | }); 18 | }(); 19 | -------------------------------------------------------------------------------- /subscribers/javascript/all-entitled-datasets/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "all-entitled-datasets", 3 | "version": "1.0.0", 4 | "description": "This sample retrieves a list of all subscriber's entitled data sets.", 5 | "main": "dist/index.js", 6 | "scripts": { 7 | "build": "ts-node index.ts" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/aws-samples/aws-dataexchange-api-samples.git" 12 | }, 13 | "author": "", 14 | "license": "ISC", 15 | "bugs": { 16 | "url": "https://github.com/aws-samples/aws-dataexchange-api-samples/issues" 17 | }, 18 | "homepage": "https://github.com/aws-samples/aws-dataexchange-api-samples#readme", 19 | "dependencies": { 20 | "@aws-sdk/client-dataexchange": "^3.632.0", 21 | "@types/node": "^12.12.2", 22 | "ts-node": "^9.1.1", 23 | "typescript": "^4.2.4" 24 | }, 25 | "engines": { 26 | "node": ">=10.0" 27 | }, 28 | "engineStrict": true 29 | } 30 | -------------------------------------------------------------------------------- /subscribers/javascript/all-entitled-datasets/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "outDir" : "dist", 5 | "sourceMap": true 6 | }, 7 | "include": [ 8 | "*.ts" 9 | ] 10 | } -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/.gitignore: -------------------------------------------------------------------------------- 1 | cdk.out 2 | dist 3 | node_modules 4 | 5 | -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/README.md: -------------------------------------------------------------------------------- 1 | # [Deprecated] Auto Export To S3 (JavaScript) 2 | 3 | > This sample is deprecated in favor of using the official AWS Data Exchange feature for auto exporting S3 revisions: https://aws.amazon.com/about-aws/whats-new/2021/09/aws-data-exchange-export-third-party-data-updates/ 4 | 5 | This sample shows how to set up an AWS Lambda function which will automatically export all newly published revisions to S3. All infrastructure is setup using the [AWS CDK](https://docs.aws.amazon.com/cdk/latest/guide/home.html). 6 | 7 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and optionally `AWS_SESSION_TOKEN`. 8 | 9 | 10 | ### First Time CDK User 11 | 12 | ``` 13 | $ npm install -g cdk 14 | $ cdk bootstrap 15 | ``` 16 | 17 | ### Build and Deploy 18 | ``` 19 | $ npm run-script deploy 20 | ``` 21 | 22 | The CloudFormation will create the following resources: 23 | 24 | 1. S3 Bucket 25 | 1. Lambda Function (with necessary IAM permissions) 26 | 1. CloudWatch Rule 27 | 28 | Each time a new Revision is published to a Data Set to which your account is subscribed, the new Revision(s) will be automatically exported to S3 as a response to the Amazon EventBridge Event sent by AWS Data Exchange. -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "node dist/cfn/lambda.js" 3 | } -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/cfn/lambda.ts: -------------------------------------------------------------------------------- 1 | import * as events from '@aws-cdk/aws-events'; 2 | import * as targets from '@aws-cdk/aws-events-targets'; 3 | import * as lambda from '@aws-cdk/aws-lambda'; 4 | import * as iam from '@aws-cdk/aws-iam'; 5 | import * as cdk from '@aws-cdk/core'; 6 | import * as s3 from '@aws-cdk/aws-s3'; 7 | 8 | export class AutoExportToS3Stack extends cdk.Stack { 9 | constructor(app: cdk.App, id: string) { 10 | super(app, id); 11 | 12 | const s3Bucket = new s3.Bucket(this, 'DataExchangeAssetsBucket', { 13 | encryption: s3.BucketEncryption.S3_MANAGED 14 | }); 15 | 16 | const lambdaFunction = new lambda.Function(this, 'AutoExportToS3Lambda', { 17 | code: new lambda.AssetCode('dist/lambda'), 18 | handler: 'exportToS3.handler', 19 | timeout: cdk.Duration.minutes(15), 20 | runtime: lambda.Runtime.NODEJS_12_X, 21 | environment: { 22 | S3_BUCKET: s3Bucket.bucketName 23 | } 24 | }); 25 | 26 | lambdaFunction.role.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AWSDataExchangeSubscriberFullAccess')); 27 | lambdaFunction.addToRolePolicy(new iam.PolicyStatement({ 28 | actions: [ 's3:PutObject', 's3:PutObjectAcl' ], 29 | resources: [ cdk.Fn.join('', [ s3Bucket.bucketArn, '/*' ]) ] 30 | })); 31 | 32 | new events.Rule(this, 'DataExchangeRule', { 33 | description: 'Each time an ENTITLED Data Set is updated with a new Revision, this Rule will be triggered.', 34 | eventPattern: { 35 | source: [ 'aws.dataexchange' ], 36 | detailType: [ 'Revision Published To Data Set' ] 37 | }, 38 | targets: [ new targets.LambdaFunction(lambdaFunction) ] 39 | }); 40 | } 41 | } 42 | 43 | const app = new cdk.App(); 44 | new AutoExportToS3Stack(app, 'AutoExportToS3Stack'); 45 | app.synth(); -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/lambda/exportToS3.ts: -------------------------------------------------------------------------------- 1 | import { Context, Callback, Handler, ScheduledEvent } from 'aws-lambda'; 2 | import { DataExchange } from 'aws-sdk'; 3 | import Logger from 'bunyan'; 4 | 5 | // https://aws.amazon.com/blogs/big-data/find-and-acquire-new-data-sets-and-retrieve-new-updates-automatically-using-aws-data-exchange/ 6 | interface IDataExchangeDetailType { 7 | RevisionIds: string[]; 8 | } 9 | 10 | const logger = Logger.createLogger({ 11 | name: 'AutoExportToS3Lamdba' 12 | }); 13 | 14 | export const handler: Handler = async function (event: ScheduledEvent, context: Context, callback: Callback) { 15 | logger.info({ event }, 'Event passed to AWS Lambda.'); 16 | 17 | const s3Bucket = process.env.S3_BUCKET; 18 | const dataExchangeClient = new DataExchange({ 19 | logger: console 20 | }); 21 | 22 | // The Resources block contains a single entry which is the DataSetId which contains the RevisionIds. 23 | const dataSetId = event.resources[0]; 24 | 25 | // Export each new Revision to S3. 26 | for (const revisionId of (event.detail as IDataExchangeDetailType).RevisionIds) { 27 | const job = await dataExchangeClient.createJob({ 28 | Type: 'EXPORT_REVISIONS_TO_S3', 29 | Details: { 30 | ExportRevisionsToS3: { 31 | DataSetId: dataSetId, 32 | RevisionDestinations: [ 33 | { 34 | RevisionId: revisionId, 35 | Bucket: s3Bucket, 36 | KeyPattern: `${dataSetId}/\${Revision.Id}/\${Asset.Name}` 37 | } 38 | ] 39 | } 40 | } 41 | }).promise(); 42 | 43 | await dataExchangeClient.startJob({ JobId: job.Id }).promise(); 44 | 45 | const completedJob = await waitForJobCompletion(job.Id, dataExchangeClient); 46 | logger.info({ completedJob }); 47 | } 48 | }; 49 | 50 | async function waitForJobCompletion(jobId: string, dataExchangeClient: DataExchange): Promise { 51 | let job: DataExchange.GetJobResponse; 52 | 53 | do { 54 | job = await dataExchangeClient.getJob({ JobId: jobId }).promise(); 55 | 56 | await sleep(5000); 57 | } while (job.State === 'WAITING' || job.State === 'IN_PROGRESS'); 58 | 59 | if (job.State === 'ERROR') { 60 | logger.error({ erroredJob: job }, 'Job encountered an error.'); 61 | } 62 | 63 | return job; 64 | } 65 | 66 | function sleep(ms: number): Promise { 67 | return new Promise(resolve => setTimeout(resolve, ms)); 68 | } 69 | -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "auto-export-to-s3", 3 | "version": "2.0.0", 4 | "description": "Auto export new AWS Data Exchange Assets to an Amazon S3 Bucket.", 5 | "scripts": { 6 | "clean": "rm -r cdk.out/ dist/", 7 | "build": "tsc", 8 | "packageDependencies": "cp package.json dist/lambda/ && cd dist/lambda/ && npm install --only=prod && cd ../.. && rm dist/lambda/package.json && rm dist/lambda/package-lock.json", 9 | "deploy": "npm run build && npm run packageDependencies && cdk deploy" 10 | }, 11 | "repository": { 12 | "type": "git", 13 | "url": "https://github.com/aws-samples/aws-dataexchange-api-samples.git" 14 | }, 15 | "author": "Michael Melchione", 16 | "license": "MIT", 17 | "bugs": { 18 | "url": "https://github.com/aws-samples/aws-dataexchange-api-samples/issues" 19 | }, 20 | "homepage": "https://github.com/aws-samples/aws-dataexchange-api-samples#readme", 21 | "devDependencies": { 22 | "typescript": "~4.1.3", 23 | "@types/node": "~14.14.25", 24 | "@types/aws-lambda": "~8.10.71", 25 | "@aws-cdk/aws-lambda": "~1.88.0", 26 | "@aws-cdk/aws-events": "~1.88.0", 27 | "@aws-cdk/aws-events-targets": "~1.88.0", 28 | "@aws-cdk/aws-s3": "~1.88.0", 29 | "@aws-cdk/aws-iam": "~1.88.0", 30 | "@aws-cdk/core": "~1.88.0", 31 | "@types/bunyan": "~1.8.6" 32 | }, 33 | "dependencies": { 34 | "aws-sdk": "~2.1354.0", 35 | "bunyan": "~1.8.15" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /subscribers/javascript/auto-export-to-s3/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "esModuleInterop": true, 5 | "target": "es6", 6 | "moduleResolution": "node", 7 | "sourceMap": true, 8 | "outDir": "dist" 9 | }, 10 | "lib": ["es2015"] 11 | } 12 | -------------------------------------------------------------------------------- /subscribers/php/all-entitled-datasets/.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | -------------------------------------------------------------------------------- /subscribers/php/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (PHP) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets. 4 | 5 | To run the sample, install the [AWS SDK for PHP](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/welcome.html) and install dependencies via [Composer](https://getcomposer.org/doc/00-intro.md). 6 | 7 | ``` 8 | $ composer install 9 | ``` 10 | 11 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` and `AWS_REGION`. 12 | 13 | ``` 14 | $ php all-entitled-datasets.php 15 | 16 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 17 | Over 80,000 property sales in New York City in 2014 18 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 19 | Over 80,000 property sales in New York City in 2015 20 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 21 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 22 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 23 | Over 80,000 property sales in New York City in 2016 24 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 25 | Records of over 80,000 property sales transactions. 26 | ``` 27 | -------------------------------------------------------------------------------- /subscribers/php/all-entitled-datasets/all-entitled-datasets.php: -------------------------------------------------------------------------------- 1 | 'us-east-1', 7 | 'version' => 'latest' 8 | ]; 9 | 10 | $sdk = new Aws\Sdk($sharedConfig); 11 | 12 | $dx = $sdk->createDataExchange(); 13 | 14 | $result = $dx->listDataSets(['Origin' => 'ENTITLED']); 15 | 16 | foreach ($result['DataSets'] as $data_set) { 17 | echo $data_set['OriginDetails']['ProductId'] . '/' . $data_set['Name'] . "\n" . 18 | ' ' . $data_set['Description'] . "\n"; 19 | } 20 | 21 | ?> -------------------------------------------------------------------------------- /subscribers/php/all-entitled-datasets/composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "require": { 3 | "aws/aws-sdk-php": "^3.133" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /subscribers/python/adx-for-apis-simple/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | This sample code will show you how to integrate with the [AWS Data Exchange for APIs (Test Product)][APITestProduct]. This simple test product echoes the request sent to it, so you can try out different HTTP Methods (GET, POST, etc.), Query String Parameters, Headers, and Body content as you explore API based data consumption. By the end of this exercise you'll know how to use the [AWS Data Exchange Client Software Development Kit (SDK)][Tools] with [Python][AWSDataExchangeSDKForPython] to make a `SendApiAsset` request to an API based AWS Data Exchange product. 4 | 5 | This will typically consist of three main stages: 6 | 1. Import relevant SDK Clients, establish base Client configuration, and instantiate the Client. \ 7 | _(This stage will remain consistent across all potential AWS Data Exchange for APIs use cases)_ 8 | 2. Define the relevant Provider / Product specific identities) \ 9 | _(This stage will remain consistent across all uses of a given Product)_ 10 | 3. Define the request-specific parameters based on your business need. \ 11 | _(This stage will likely change for every request)_ 12 | 13 | ## Getting Started 14 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the ["AWS Data Exchange for APIs (Test product)"][APITestProduct] Product, and subscribe. 15 | Copy the relevant `DataSetId`, `RevisionId`, and `AssetId` from the Entitled Data page and paste them into the `data_set_id`, `revision_id` and `asset_id` variables in the code sample (adx4api.py) (you will likely find they match the sample contents). Next, update the `send_api_asset` call parameters based on your desired request parameters. Again, for test purposes, the provided inputs should work just fine. Finally, install the necessary dependencies (boto3) using `python3 -m pip install -r requirements.txt`, and then execute the script with `./adx4api.py`. 16 | 17 | To assist with finding the necessary inputs for the `product information` and `additional send_api_asset api call parameters`, the Data Exchange console provides Sample CLI requests as shown below. The first 3 parameters map to the product information, and the rest map to send_api_asset api call. 18 | 19 | ``` 20 | aws dataexchange send-api-asset \ 21 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 22 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 23 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 24 | --method POST \ 25 | --path "/" \ 26 | --query-string-parameters 'param1=value1,param2=value2' \ 27 | --request-headers 'header=header_value' \ 28 | --body "{\"body_param\":\"body_param_value\"}" 29 | ``` 30 | 31 | ## Authentication 32 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. For local development purposes, this will typically use credentials provided to the AWS CLI by [`aws configure`][AWSConfigure]. When running on Amazon EC2 it will typically use the [EC2 Instance Profile][IAMRolesForEC2], and for AWS Lambda it will use the [Lambda Execution Role][LambdaExecutionRole]. 33 | 34 | 35 | ## Setup 36 | 37 | Install the requirements, preferably in a virtual environment. 38 | 39 | ```bash 40 | $ python3 -m pip install -r requirements.txt 41 | ``` 42 | 43 | ## Execution 44 | 45 | Execute the script. 46 | 47 | ```bash 48 | $ ./adx4api.py 49 | ``` 50 | 51 | [APITestProduct]: https://us-east-1.console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-pgkxrurxwmp76 52 | [Tools]: https://aws.amazon.com/tools/ 53 | [AWSDataExchangeSDKForPython]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dataexchange.html#DataExchange.Client.send_api_asset 54 | [IAMRolesForEC2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html 55 | [LambdaExecutionRole]: https://docs.aws.amazon.com/lambda/latest/dg/lambda-intro-execution-role.html 56 | [AWSConfigure]: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html 57 | -------------------------------------------------------------------------------- /subscribers/python/adx-for-apis-simple/adx4api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Please see the README.md on the GitHub AWS Data Exchange Samples repository for a more detailed overview with links to relevant AWS documentation. 5 | 6 | This sample code will show you how to integrate with the [AWS Data Exchange for APIs (Test Product)][APITestProduct]. This simple test product 7 | echoes the request sent to it, so you can try out different HTTP Methods (GET, POST, etc.), Query String Parameters, Headers, and Body content 8 | as you explore API based data consumption. By the end of this exercise you'll know how to use the [AWS Data Exchange Client Software Development 9 | Kit (SDK)][Tools] with [Python][AWSDataExchangeSDKForPython] to make a `SendApiAsset` request to an API based AWS Data Exchange product. 10 | 11 | This will typically consist of three main stages: 12 | 1. Import relevant SDK Clients and Interfaces, establish base Client configuration, and instantiate the Client. 13 | (This stage will remain consistent across all potential AWS Data Exchange for APIs use cases) 14 | 2. Define the relevant Provider / Product specific identities) 15 | (This stage will remain consistent across all uses of a given Product. 16 | 3. Define the request-specific parameters based on your business need. 17 | (This stage will likely change for every request) 18 | 19 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the "AWS Data Exchange for APIs (Test product)" 20 | Product, and subscribe. 21 | Copy the relevant DataSetId, RevisionId, and AssetId from the Entitled Data page and paste them into the productInfo constant below 22 | (you will likely find they match the sample contents). Next, update the sendApiAssetCommandInput constant based on your desired request 23 | parameters. Again, for test purposes, the provided inputs should work just fine. Finally, install the necessary dependencies 24 | (@aws-sdk/client-dataexchange) using `npm install`, transpile from TypeScript to Javascript using `tsc`, and then execute the script with `node adx4api`. 25 | 26 | To assist with finding the necessary inputs for the productInfo and sendApiAssetCommandInput constants, the Data Exchange console provides 27 | Sample CLI requests as shown below. The first 3 parameters map to the productInfo constant, and the rest map to sendApiAssetCommandInput 28 | aws dataexchange send-api-asset \ 29 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 30 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 31 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 32 | --method POST \ 33 | --path "/" \ 34 | --query-string-parameters 'param1=value1,param2=value2' \ 35 | --request-headers 'header=header_value' \ 36 | --body "{\"body_param\":\"body_param_value\"}" 37 | 38 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. 39 | For local development purposes, this will typically use credentials provided to the AWS CLI by `aws configure` 40 | When running on Amazon EC2 it will typically use the EC2 Instance Profile, and for AWS Lambda it will use the Lambda Execution Role. 41 | """ 42 | 43 | import json 44 | import boto3 45 | 46 | # Instantiate DataExchange client for us-east-1 region 47 | CLIENT = boto3.client('dataexchange', region_name = 'us-east-1') 48 | 49 | 50 | # product info from entitled products, this uses AWS Data Exchange API sample product from us-east-1 region 51 | DATA_SET_ID = '8d494cba5e4720e5f6072e280daf70a8' 52 | REVISION_ID = '32559097c7d209b02af6de5cad4385fe' 53 | ASSET_ID = '4e94198cfdb8400793fb3f0411861960' 54 | 55 | # Additional parameters for the send_api_asset call 56 | BODY = json.dumps({'body_param': 'body_param_value'}) 57 | METHOD = 'POST' 58 | PATH = '/' 59 | QUERY_STRING_PARAMETERS = {'param1': 'value1', 'param2': 'value2'} 60 | 61 | 62 | response = CLIENT.send_api_asset( 63 | DataSetId=DATA_SET_ID, 64 | RevisionId=REVISION_ID, 65 | AssetId=ASSET_ID, 66 | Method=METHOD, 67 | Path=PATH, 68 | Body=BODY, 69 | QueryStringParameters=QUERY_STRING_PARAMETERS 70 | ) 71 | 72 | print('Response Headers:') 73 | for header in response['ResponseHeaders']: 74 | value = response['ResponseHeaders'][header] 75 | print(f' {header}: {value}') 76 | 77 | print() 78 | print('Response Body:') 79 | print( response['Body'] ) 80 | -------------------------------------------------------------------------------- /subscribers/python/adx-for-apis-simple/pylintrc: -------------------------------------------------------------------------------- 1 | [MAIN] 2 | max-line-length=160 3 | -------------------------------------------------------------------------------- /subscribers/python/adx-for-apis-simple/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.24.75 2 | -------------------------------------------------------------------------------- /subscribers/python/download-entitled-assets/README.md: -------------------------------------------------------------------------------- 1 | # Download all Entitled Assets (Python) 2 | 3 | This example will automatically download all assets to which you are entitled to your local machine, in Python. 4 | 5 | Downloading requires temporarily staging the files in S3. You can provide a bucket to stage the assets, in which case the assets will remain in the bucket after the script executes. If no S3 bucket is provided, this script will create a temporary S3 bucket in your account, and will delete 6 | this bucket and all exported assets after the script completes. 7 | 8 | ### Setup 9 | 10 | Install the requirements, preferably in a virtual environment. 11 | 12 | ```bash 13 | $ pip install -r requirements.txt 14 | ``` 15 | 16 | Set AWS access key and secret. 17 | 18 | ``` 19 | $ export AWS_ACCESS_KEY_ID= 20 | $ export AWS_SECRET_ACCESS_KEY= 21 | ``` 22 | 23 | The following policies are required for this AWS user. 24 | 25 | * AmazonS3FullAccess 26 | * AWSDataExchangeFullAccess 27 | 28 | ### Execution 29 | 30 | Execute the script, optionally providing an S3 bucket to stage your downloaded assets. 31 | 32 | ```bash 33 | $ ./download-entitled-assets.py [--s3-bucket=] 34 | ``` 35 | -------------------------------------------------------------------------------- /subscribers/python/download-entitled-assets/download-entitled-assets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import boto3 4 | import os 5 | import re 6 | import time 7 | import click 8 | import uuid 9 | 10 | 11 | dx = boto3.client('dataexchange', region_name='us-east-1') 12 | s3 = boto3.client('s3') 13 | 14 | 15 | def get_all_revisions(data_set_id): 16 | 17 | revisions = [] 18 | res = dx.list_data_set_revisions(DataSetId=data_set_id) 19 | next_token = res.get('NextToken') 20 | 21 | revisions += res.get('Revisions') 22 | while next_token: 23 | res = dx.list_data_set_revisions(DataSetId=data_set_id, 24 | NextToken=next_token) 25 | revisions += res.get('Revisions') 26 | next_token = res.get('NextToken') 27 | 28 | return revisions 29 | 30 | 31 | def get_all_assets(data_set_id, revision_id): 32 | assets = [] 33 | res = dx.list_revision_assets(DataSetId=data_set_id, 34 | RevisionId=revision_id) 35 | next_token = res.get('NextToken') 36 | 37 | assets += res.get('Assets') 38 | while next_token: 39 | res = dx.list_revision_assets(DataSetId=data_set_id, 40 | RevisionId=revision_id, 41 | NextToken=next_token) 42 | assets += res.get('Assets') 43 | next_token = res.get('NextToken') 44 | 45 | return assets 46 | 47 | 48 | def get_entitled_data_sets(): 49 | data_sets = [] 50 | res = dx.list_data_sets(Origin='ENTITLED') 51 | next_token = res.get('NextToken') 52 | 53 | data_sets += res.get('DataSets') 54 | while next_token: 55 | res = dx.list_data_sets(Origin='ENTITLED', 56 | NextToken=next_token) 57 | data_sets += res.get('DataSets') 58 | next_token = res.get('NextToken') 59 | 60 | return data_sets 61 | 62 | 63 | def export_assets(assets, bucket): 64 | 65 | asset_destinations = [] 66 | 67 | for asset in assets: 68 | asset_destinations.append({ 69 | "AssetId": asset.get('Id'), 70 | "Bucket": bucket, 71 | "Key": asset.get('Name') 72 | }) 73 | 74 | job = dx.create_job(Type='EXPORT_ASSETS_TO_S3', Details={ 75 | "ExportAssetsToS3": { 76 | "RevisionId": asset.get("RevisionId"), "DataSetId": asset.get("DataSetId"), 77 | "AssetDestinations": asset_destinations 78 | } 79 | }) 80 | 81 | job_id = job.get('Id') 82 | dx.start_job(JobId=job_id) 83 | 84 | while True: 85 | job = dx.get_job(JobId=job_id) 86 | 87 | if job.get('State') == 'COMPLETED': 88 | break 89 | elif job.get('State') == 'ERROR': 90 | raise Exception("Job {} failed to complete - {}".format( 91 | job_id, job.get('Errors')[0].get('Message')) 92 | ) 93 | 94 | time.sleep(1) 95 | 96 | 97 | def to_url(s): 98 | s = re.sub(r"[^\w\s]", '', s) 99 | s = re.sub(r"\s+", '-', s) 100 | 101 | return s 102 | 103 | 104 | def download_assets(assets, bucket, asset_dir): 105 | for asset in assets: 106 | asset_name = asset.get('Name') 107 | sub_dir = os.path.dirname(asset_name) 108 | full_dir = os.path.join(asset_dir, sub_dir) 109 | 110 | if not os.path.exists(full_dir): 111 | os.makedirs(full_dir) 112 | 113 | asset_file = os.path.join(full_dir, os.path.basename(asset_name)) 114 | 115 | s3.download_file(bucket, asset_name, asset_file) 116 | 117 | print("Downloaded file {}".format(asset_file)) 118 | 119 | 120 | def make_s3_staging_bucket(): 121 | bucket_name = str(uuid.uuid4()) 122 | s3.create_bucket(Bucket=bucket_name) 123 | return bucket_name 124 | 125 | 126 | def remove_s3_bucket(bucket_name): 127 | s3_resource = boto3.resource('s3') 128 | bucket = s3_resource.Bucket(bucket_name) 129 | bucket.objects.all().delete() 130 | bucket.delete() 131 | 132 | 133 | @click.command() 134 | @click.option('--s3-bucket', '-s') 135 | def main(s3_bucket): 136 | 137 | if not s3_bucket: 138 | print("No s3 bucket provided, creating temporary staging bucket") 139 | temp_bucket = make_s3_staging_bucket() 140 | print("Created temporary bucket {}".format(temp_bucket)) 141 | 142 | try: 143 | data_sets = get_entitled_data_sets() 144 | 145 | staging_bucket = s3_bucket or temp_bucket 146 | 147 | for ds in data_sets: 148 | print("Getting all Assets for Data set ### {} ###".format(ds.get('Name'))) 149 | 150 | revisions = get_all_revisions(ds.get('Id')) 151 | for rev in revisions: 152 | assets = get_all_assets(ds.get('Id'), rev.get('Id')) 153 | 154 | destination_dir = os.path.join(to_url(ds.get('Name')), rev.get('Id')) 155 | 156 | export_assets(assets, staging_bucket) 157 | download_assets(assets, staging_bucket, destination_dir) 158 | 159 | print("---") 160 | finally: 161 | if temp_bucket: 162 | print("Removing temporary bucket {}".format(temp_bucket)) 163 | remove_s3_bucket(temp_bucket) 164 | 165 | 166 | if __name__ == '__main__': 167 | main() -------------------------------------------------------------------------------- /subscribers/python/download-entitled-assets/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.10.23 2 | botocore==1.13.23 3 | Click==7.0 4 | -------------------------------------------------------------------------------- /subscribers/python/export-data-sets/README.md: -------------------------------------------------------------------------------- 1 | # Export entitled data set assets (Python) 2 | 3 | This example accepts dataset_id(s),bucket,region, and exports all revisions within the specified data-set-ids into an S3 bucket. 4 | 5 | ### Setup 6 | 7 | Install the requirements, preferably in a virtual environment. 8 | 9 | ```bash 10 | $ pip install -r requirements.txt 11 | ``` 12 | 13 | Set AWS access key and secret. 14 | 15 | ``` 16 | $ export AWS_ACCESS_KEY_ID= 17 | $ export AWS_SECRET_ACCESS_KEY= 18 | ``` 19 | 20 | The following policies are required for this AWS user. 21 | 22 | * AmazonS3FullAccess 23 | * AWSDataExchangeSubscriberFullAccess 24 | 25 | ### Execution 26 | 27 | You can run following command to execute the script. Note that bucket,region, and data-set-ids are mandatory parameters and region specified must match region data-sets and bucket are hosted in. If key-pattern is not specified, it defaults to the pattern of *${Revision.Id}/${Asset.Name}*. 28 | 29 | ```bash 30 | $ ./export-data-sets.py --bucket 'bucket-name' --data-set-ids 'comma-separated-data-set-id(s)' --region 'region-name' --key-pattern 'key-pattern' ``` 31 | 32 | Note - If the script fails, please check whether dataset and bucket are hosted in same region. -------------------------------------------------------------------------------- /subscribers/python/export-data-sets/export-data-sets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import boto3 4 | import os 5 | import re 6 | import time 7 | import click 8 | import uuid 9 | import json 10 | import botocore 11 | 12 | dx = boto3.client('dataexchange') 13 | s3 = boto3.client('s3') 14 | 15 | #This function returns revision_ids corresponding to the data-set-id specified. 16 | def get_revisions(data_set_id): 17 | 18 | #Paginate and extract all revisions corresponding to the data-set specified. 19 | revisions = [] 20 | #print('Extracting revision-ids for data set',data_set_id) 21 | try: 22 | res = dx.list_data_set_revisions(DataSetId=data_set_id) 23 | next_token = res.get('NextToken') 24 | revisions += res.get('Revisions') 25 | while next_token: 26 | res = dx.list_data_set_revisions(DataSetId=data_set_id, 27 | NextToken=next_token) 28 | revisions += res.get('Revisions') 29 | next_token = res.get('NextToken') 30 | except dx.exceptions.ResourceNotFoundException as error: 31 | print('The data set does not belong to region specified.') 32 | exit() 33 | return revisions 34 | 35 | 36 | #This function exports assets corresponding to revisions specified into an S3 bucket 37 | def export_revisions(data_set_id,revisions,bucket,key_pattern): 38 | 39 | for i in range(0, len(revisions), 5): 40 | job_ids=[] 41 | 42 | #Trigger 5 concurrent export jobs at a time 43 | for revision in revisions[i:i + 5]: 44 | create_job_response = dx.create_job( 45 | Details={ 46 | 'ExportRevisionsToS3': { 47 | "DataSetId": data_set_id, 48 | 'RevisionDestinations':[ {"RevisionId": revision['Id'], "Bucket": bucket, "KeyPattern": key_pattern}] 49 | }},Type='EXPORT_REVISIONS_TO_S3' 50 | ) 51 | 52 | job_id=create_job_response['Id'] 53 | job_ids.append(job_id) 54 | 55 | #Initiate the job 56 | print("=> Starting Job: ",job_id, "for revision: ",revision['Id']) 57 | dx.start_job(JobId=job_id) 58 | 59 | #Wait for all import jobs to finish 60 | for job in job_ids: 61 | max_time = time.time() + 60*60 # 1 hour 62 | #print(job) 63 | while time.time() < max_time : 64 | response = dx.get_job(JobId=job_id); 65 | status = response['State'] 66 | print('STATUS: ',job,'get_job_status'+": {}".format(status)) 67 | if status == "COMPLETED": 68 | break 69 | elif status == "ERROR": 70 | print(response) 71 | print("Export failed") 72 | exit() 73 | time.sleep(5) 74 | time.sleep(15) 75 | 76 | 77 | # This function accepts data_set-ids, region and an optional key-pattern and then exports the data into specified S3 bucket. The region of the S3 bucket and data_set must be same. 78 | @click.command() 79 | @click.option('--bucket', '-s') 80 | @click.option('--data-set-ids', '-s') 81 | @click.option('--region', '-s') 82 | @click.option('--key-pattern', '-s') 83 | def main(bucket,data_set_ids,region,key_pattern): 84 | global dx,s3 85 | if not bucket: 86 | print("No s3 bucket provided") 87 | elif not data_set_ids: 88 | print("No data_set_ids provided") 89 | if not region: 90 | print("No region provided") 91 | else: 92 | #Override region for connections. 93 | if not key_pattern: 94 | key_pattern= "${Revision.Id}/${Asset.Name}" 95 | dx = boto3.client('dataexchange', region_name=region) 96 | s3 = boto3.client('s3', region_name=region) 97 | print(s3.get_bucket_location(Bucket=bucket)) 98 | location = s3.get_bucket_location(Bucket=bucket)['LocationConstraint'] 99 | if location == None: 100 | location='us-east-1' 101 | 102 | if region != location.replace("'",""): 103 | print ('Data set region does not match bucket\'s region. Cross region exports incur additional charges and cross-region exports over 100GB might fail.') 104 | if input('Do You Want To Continue? (y/n) ') != 'y': 105 | print('Cancelling export.') 106 | exit() 107 | 108 | #loop through data_set_ids and extract 109 | for data_set_id in data_set_ids.split(","): 110 | revisions = get_revisions(data_set_id) 111 | print("Initiating export for data set {} ".format(data_set_id)) 112 | export_revisions(data_set_id,revisions,bucket,key_pattern) 113 | print("Export for data set {} is complete".format(data_set_id)) 114 | print("Export complete.") 115 | 116 | 117 | if __name__ == '__main__': 118 | main() -------------------------------------------------------------------------------- /subscribers/python/export-data-sets/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.10.23 2 | botocore==1.13.23 3 | Click==7.0 4 | -------------------------------------------------------------------------------- /subscribers/python/pandas-describe-csv/README.md: -------------------------------------------------------------------------------- 1 | # Import CSV asset to pandas 2 | 3 | This example imports a CSV asset from Data Exchange into a pandas Data Frame object and `describe()` the result. 4 | 5 | ### Setup 6 | 7 | Install the requirements, preferably in a virtual environment. 8 | 9 | ```bash 10 | $ pip install -r requirements.txt 11 | ``` 12 | 13 | Set AWS access key and secret. 14 | 15 | ``` 16 | $ export AWS_ACCESS_KEY_ID= 17 | $ export AWS_SECRET_ACCESS_KEY= 18 | ``` 19 | 20 | The following policies are required for this AWS user. 21 | 22 | * AmazonS3FullAccess 23 | * AWSDataExchangeFullAccess 24 | 25 | Subscribe to a product on [AWS Data Exchange](https://aws.amazon.com/data-exchange), and note the Arn for the CSV asset you would like to test against. 26 | 27 | ### Execution 28 | 29 | This script creates a temporary S3 Bucket in your account to export the assets, and a temporary directory to stage the file locally. 30 | 31 | ```bash 32 | $ ./pandas-describe-csv.py 33 | ``` 34 | 35 | Sample output using [Rearc Tax Revenue (% of GDP) from World Bank Open Data](https://console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-yfrvk7kf66aiy). 36 | 37 | ``` 38 | $ ./pandas-describe-csv.py arn:aws:dataexchange:us-east-1::data-sets/5c8f9ac07883d81d8f25e2b9dd28efce/revisions/40c042c6b24286f1acf36b49e5748b36/assets/770435e0fd1aa970450b1b7c2e6a39f9 39 | 40 | 1972 1973 1974 1975 1976 1977 1978 ... 2011 2012 2013 2014 2015 2016 2017 41 | count 41.000000 50.000000 53.000000 50.000000 51.000000 52.000000 53.000000 ... 168.000000 155.000000 157.000000 156.000000 153.000000 146.000000 130.000000 42 | mean 17.595742 16.770584 16.003546 16.315434 16.817749 17.245061 17.879250 ... 16.783580 17.166849 16.784489 17.162941 17.002521 17.053722 17.775034 43 | std 8.923219 8.116698 6.033629 5.317894 5.675549 6.132574 8.739127 ... 6.262297 6.412270 6.428430 6.374452 6.236064 6.240035 5.857815 44 | min 7.610619 7.091172 5.417791 7.521319 7.562059 4.615802 7.597964 ... 0.321414 0.363786 0.370451 0.355723 0.057734 0.043495 0.066984 45 | 25% 12.445223 11.536664 11.810243 12.343971 12.725799 12.954639 12.651562 ... 13.132882 13.438136 13.008075 12.684953 12.723964 13.013613 13.646999 46 | 50% 14.872564 14.804852 15.021760 16.429262 16.552555 16.694574 16.511790 ... 16.155646 16.061603 15.668958 16.084710 16.124201 15.847322 17.322809 47 | 75% 21.171189 18.763604 18.397341 20.159210 19.795274 20.690760 21.466918 ... 20.248498 20.978581 21.601282 22.033340 21.646466 21.839676 22.202239 48 | max 58.950073 56.281979 32.677682 30.394147 33.768480 35.126715 65.423553 ... 37.562987 36.937839 36.376968 36.500291 33.921623 37.752914 33.323447 49 | 50 | [8 rows x 46 columns] 51 | ``` 52 | -------------------------------------------------------------------------------- /subscribers/python/pandas-describe-csv/pandas-describe-csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import click 4 | import boto3 5 | import os 6 | import re 7 | import time 8 | import tempfile 9 | import uuid 10 | import pandas 11 | 12 | 13 | dx = boto3.client('dataexchange', region_name='us-east-1') 14 | s3 = boto3.client('s3') 15 | 16 | 17 | class TemporaryS3Bucket(object): 18 | def __init__(self): 19 | bucket_name = str(uuid.uuid4()) 20 | 21 | self.bucket_name = bucket_name 22 | self.s3 = boto3.resource('s3') 23 | 24 | self.s3.create_bucket(Bucket=bucket_name) 25 | 26 | def __enter__(self): 27 | return self.bucket_name 28 | 29 | def __exit__(self, type, value, traceback): 30 | bucket = self.s3.Bucket(self.bucket_name) 31 | bucket.objects.all().delete() 32 | bucket.delete() 33 | 34 | 35 | def export_asset(asset, bucket): 36 | asset_id = asset.get('Id') 37 | 38 | job = dx.create_job(Type='EXPORT_ASSETS_TO_S3', Details={ 39 | "ExportAssetsToS3": { 40 | "RevisionId": asset.get("RevisionId"), "DataSetId": asset.get("DataSetId"), 41 | "AssetDestinations": [{ 42 | "AssetId": asset_id, 43 | "Bucket": bucket, 44 | "Key": asset_id 45 | }] 46 | } 47 | }) 48 | 49 | job_id = job.get('Id') 50 | dx.start_job(JobId=job_id) 51 | 52 | while True: 53 | job = dx.get_job(JobId=job_id) 54 | 55 | if job.get('State') == 'COMPLETED': 56 | break 57 | elif job.get('State') == 'ERROR': 58 | raise Exception("Job {} failed to complete - {}".format( 59 | job_id, job.get('Errors')[0].get('Message')) 60 | ) 61 | 62 | time.sleep(1) 63 | 64 | 65 | def urlify(s): 66 | s = re.sub(r"[^\w\s]", '', s) 67 | s = re.sub(r"\s+", '-', s) 68 | 69 | return s 70 | 71 | 72 | def parse_asset_arn(arn): 73 | groups = re.match('.*data-sets/(.*)/revisions/(.*)/assets/(.*)$', arn) 74 | 75 | return { 76 | "DataSetId": groups[1], 77 | "RevisionId": groups[2], 78 | "Id": groups[3] 79 | } 80 | 81 | 82 | def dx_csv_to_data_frame(asset): 83 | with TemporaryS3Bucket() as bucket: 84 | export_asset(asset, bucket) 85 | with tempfile.TemporaryDirectory() as temp_dir: 86 | asset_id = asset.get('Id') 87 | dest = os.path.join(temp_dir, asset_id) 88 | s3.download_file(bucket, asset_id, dest) 89 | 90 | return pandas.read_csv(dest) 91 | 92 | 93 | @click.command() 94 | @click.argument('arn') 95 | def cli(arn): 96 | asset = parse_asset_arn(arn) 97 | df = dx_csv_to_data_frame(asset) 98 | print(df.describe()) 99 | 100 | 101 | if __name__ == '__main__': 102 | cli() 103 | -------------------------------------------------------------------------------- /subscribers/python/pandas-describe-csv/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.10.41 2 | botocore==1.13.41 3 | Click==7.0 4 | docutils==0.15.2 5 | jmespath==0.9.4 6 | numpy==1.22.0 7 | pandas==0.25.3 8 | python-dateutil==2.8.0 9 | pytz==2019.3 10 | s3transfer==0.2.1 11 | six==1.13.0 12 | urllib3==1.26.19 13 | -------------------------------------------------------------------------------- /subscribers/python/setup-redshift-data-shares/README.md: -------------------------------------------------------------------------------- 1 | # Setup Redshift Data Shares (Python) 2 | 3 | This example will create databases in an Amazon Redshift cluster for data shares in a given AWS Data Exchange data set and revision. The script will create a single database for each asset in the revision, using the asset's `Name` as the database name. 4 | 5 | *Note*: Database names are unique within the cluster, so there is potential for collisions if there are existing databases with the same name as one of the assets. Additionally, data shares can only be imported to a Redshift cluster once. So, this script can only be run successfully once without changing input. 6 | 7 | For more documentation: 8 | * https://docs.aws.amazon.com/data-exchange/latest/userguide/what-is.html 9 | * https://aws.amazon.com/redshift/features/aws-data-exchange-for-amazon-redshift/ 10 | 11 | ### Setup 12 | 13 | Install the requirements, preferably in a virtual environment. 14 | 15 | ```bash 16 | $ pip install -r requirements.txt 17 | ``` 18 | 19 | Set AWS access key and secret. 20 | 21 | ``` 22 | $ export AWS_ACCESS_KEY_ID= 23 | $ export AWS_SECRET_ACCESS_KEY= 24 | ``` 25 | 26 | The following IAM policy is required for this AWS user. 27 | 28 | ``` 29 | { 30 | "Version": "2012-10-17", 31 | "Statement": [ 32 | { 33 | "Action": [ 34 | "redshift:GetClusterCredentials" 35 | "redshift-data:DescribeStatement", 36 | "redshift-data:ExecuteStatement" 37 | ], 38 | "Effect": "Allow", 39 | "Resource": "*" 40 | }, 41 | { 42 | "Action": [ 43 | "dataexchange:GetDataSet", 44 | "dataexchange:ListRevisionAssets" 45 | ], 46 | "Effect": "Allow", 47 | "Resource": "*" 48 | } 49 | ] 50 | } 51 | ``` 52 | 53 | ### Execution 54 | 55 | ```bash 56 | $ python3 setup-redshift-data-shares.py \ 57 | --data-set-id example-data-set-id-1234567890129 \ 58 | --revision-id example-revision-id-2364567812345 \ 59 | --redshift-cluster-id redshift-cluster-1 \ 60 | --redshift-cluster-database dev \ 61 | --redshift-cluster-database-user awsuser 62 | ``` 63 | -------------------------------------------------------------------------------- /subscribers/python/setup-redshift-data-shares/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.20.23 2 | botocore>=1.20.23 3 | Click>=7.0 4 | -------------------------------------------------------------------------------- /subscribers/python/setup-redshift-data-shares/setup-redshift-data-shares.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import boto3 3 | import re 4 | import click 5 | import time 6 | 7 | ASSET_TYPE = 'REDSHIFT_DATA_SHARE' 8 | 9 | # Uses the Asset's Name and Asset's Data Share Arn to get the required parameters for the Redshift 10 | # create database from datashare query. 11 | # Docs: https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_DATABASE.html 12 | def get_create_database_sql_from_asset(asset): 13 | # Using the Asset's Name as the name of the to-be-created database in the consumer cluster. 14 | asset_name = asset['Name'] 15 | data_share_arn = asset['AssetDetails']['RedshiftDataShareAsset']['Arn'] 16 | matches = re.search(r'arn:aws:redshift:.+:(\d+):datashare:(.+)/(.+)', data_share_arn) 17 | 18 | # Account which owns the Data Share. 19 | source_account = matches.group(1) 20 | 21 | # Cluster namespace from which the data is shared. 22 | source_namespace = matches.group(2) 23 | 24 | # Name of the source Data Share. 25 | data_share_name = matches.group(3) 26 | 27 | return f'CREATE DATABASE {asset_name} FROM DATASHARE {data_share_name} OF ACCOUNT \'{source_account}\' NAMESPACE \'{source_namespace}\'' 28 | 29 | 30 | def wait_for_statement_to_complete(redshift_data, statement_id): 31 | while True: 32 | statement = redshift_data.describe_statement(Id=statement_id) 33 | status = statement['Status'] 34 | 35 | if status in ['ABORTED', 'FAILED']: 36 | error = statement['Error'] 37 | click.echo(f'Error encountered while executing statement: {error}') 38 | raise Exception(f'Redshift query failed with status {status}!') 39 | if status in ['FINISHED']: 40 | return 41 | 42 | # Else, statement is still running. 43 | time.sleep(2) 44 | 45 | def create_databases_from_assets(redshift_data, assets, redshift_cluster_id, redshift_cluster_database, redshift_cluster_database_user): 46 | for asset in assets: 47 | sql = get_create_database_sql_from_asset(asset) 48 | statement = redshift_data.execute_statement( 49 | ClusterIdentifier=redshift_cluster_id, 50 | Database=redshift_cluster_database, 51 | DbUser=redshift_cluster_database_user, 52 | Sql=sql 53 | ) 54 | 55 | id = statement['Id'] 56 | 57 | wait_for_statement_to_complete(redshift_data, id) 58 | click.echo(f'SQL statement executed successfully: "{sql}"') 59 | 60 | 61 | def get_assets_from_data_set_and_revision(dataexchange, data_set_id, revision_id): 62 | data_set = dataexchange.get_data_set(DataSetId=data_set_id) 63 | data_set_asset_type = data_set['AssetType'] 64 | 65 | if data_set_asset_type != ASSET_TYPE: 66 | raise Exception(f'AssetType must be of type {ASSET_TYPE} but was {data_set_asset_type}!') 67 | 68 | return dataexchange.list_revision_assets(DataSetId=data_set_id, RevisionId=revision_id)['Assets'] 69 | 70 | 71 | @click.command() 72 | @click.option('--data-set-id', required=True, help='AWS Data Exchange Data set which contains Redshift data shares to set up. Data set must have AssetType REDSHIFT_DATA_SHARE.') 73 | @click.option('--revision-id', required=True, help='AWS Data Exchange Revision which contains Redshift data shares to set up.') 74 | @click.option('--redshift-cluster-id', required=True, help='Amazon Redshift cluster from which the Redshift data shares will be queried.') 75 | @click.option('--redshift-cluster-database', required=True, help='Amazon Redshift database from which the Redshift data shares will be queried.') 76 | @click.option('--redshift-cluster-database-user', required=True, help='Amazon Redshift database user which can connect to the Redshift cluster and database.') 77 | @click.option('--region', default='us-east-1', help='AWS Region of the Data set.') 78 | def main(data_set_id, revision_id, redshift_cluster_id, redshift_cluster_database, redshift_cluster_database_user, region): 79 | dataexchange = boto3.client('dataexchange', region_name=region) 80 | redshift_data = boto3.client('redshift-data', region_name=region) 81 | 82 | assets = get_assets_from_data_set_and_revision(dataexchange, data_set_id, revision_id) 83 | create_databases_from_assets(redshift_data, assets, redshift_cluster_id, redshift_cluster_database, redshift_cluster_database_user) 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /subscribers/python/tf-auto-export-to-s3/README.md: -------------------------------------------------------------------------------- 1 | # [Deprecated] Auto Export To S3 (Python) 2 | 3 | > This sample is deprecated in favor of using the official AWS Data Exchange feature for auto exporting S3 revisions: https://aws.amazon.com/about-aws/whats-new/2021/09/aws-data-exchange-export-third-party-data-updates/ 4 | 5 | This sample shows how to set up an AWS Lambda function which will automatically export all newly published revisions for a given DataSet to S3. All infrastructure is setup using Terraform. 6 | 7 | Logical Prerequisites: 8 | * DataSet ID for an active subscription to a DataSet on AWS Data Exchange. 9 | * Revision ID for the target first revision to export. 10 | 11 | Technical Prerequisites: 12 | * Terraform is installed. 13 | * AWS CLI is installed and has a profile configured with access to create IAM, S3, and Lambda resources and issue permissions to CloudWatch, S3, and DataExchange. 14 | 15 | > The deployment will target us-east-1 and use the "default" AWS CLI profile by default. Update the "profile" and "region" configuration in the "provider" 16 | section of the terraform template to modify these defaults. 17 | 18 | **Please note that this sample is provided for demonstration and learning purposes only, and should be reviewed for alignment with organisational policies and best practices before any production use.** 19 | 20 | 21 | ### First Time Terraform Initiation 22 | 23 | ``` 24 | $ terraform init 25 | ``` 26 | 27 | ### Build and Deploy 28 | ``` 29 | $ chmod 700 build.sh 30 | $ ./build.sh 31 | $ terraform plan 32 | $ terraform apply 33 | ``` 34 | 35 | build.sh zips the index.py into a lambda_code.zip archive ready for upload. 36 | 37 | Terraform will create the following resources: 38 | 39 | 1. S3 Bucket to store exported Data Exchange assets (files) 40 | 1. Lambda Function to perform the export (with necessary IAM permissions) 41 | 1. Lambda Layer providing the Boto3 v1.17 libraries 42 | 1. EventBridge (CloudWatch Events) Rule to trigger Lambda 43 | 44 | Each time a new Revision is added to the DataSet you provide on deployment, the Assets will be automatically exported to S3 as a response to the CloudWatch Event sent by AWS Data Exchange. -------------------------------------------------------------------------------- /subscribers/python/tf-auto-export-to-s3/adx-example.tf: -------------------------------------------------------------------------------- 1 | # 2 | # AWS Data Exchange automated revision export to S3 upon published Cloudwatch event 3 | # 4 | 5 | terraform { 6 | required_providers { 7 | aws = { 8 | source = "hashicorp/aws" 9 | version = "~> 3.25.0" 10 | } 11 | } 12 | } 13 | 14 | # Configure AWS Provider account & target region 15 | provider "aws" { 16 | profile = "default" 17 | region = "us-east-1" 18 | } 19 | 20 | # Require dataset ID and initial revision ID to be input before the deployment can take place (the dataset must be subscribed to manually in the AWS Console) 21 | variable "datasetID" { 22 | type = string 23 | description = "REQUIRED: the ID for the DataSet" 24 | } 25 | 26 | variable "revisionID" { 27 | type = string 28 | description = "REQUIRED: the ID for an initial Revision to download immediately." 29 | } 30 | 31 | # Create S3 bucket to store exported data in 32 | resource "aws_s3_bucket" "DataS3Bucket" { 33 | bucket_prefix = "datas3bucket" 34 | } 35 | 36 | # Apply all Public Access Block controls by default 37 | resource "aws_s3_bucket_public_access_block" "DataS3BucketPublicAccessBlock" { 38 | bucket = aws_s3_bucket.DataS3Bucket.id 39 | block_public_acls = true 40 | block_public_policy = true 41 | ignore_public_acls = true 42 | restrict_public_buckets = true 43 | } 44 | 45 | # Create Lambda function using Python code included in lambda_code.zip 46 | resource "aws_lambda_function" "FunctionGetNewRevision" { 47 | function_name = "FunctionGetNewRevision" 48 | filename = "lambda_code.zip" 49 | source_code_hash = filebase64sha256("lambda_code.zip") 50 | handler = "index.handler" 51 | environment { 52 | variables = { 53 | S3_BUCKET = aws_s3_bucket.DataS3Bucket.bucket 54 | } 55 | } 56 | role = aws_iam_role.RoleGetNewRevision.arn 57 | runtime = "python3.7" 58 | timeout = 180 59 | layers = [aws_lambda_layer_version.Boto3LibLayer.arn] 60 | } 61 | 62 | # Create Lambda Layer to provide Boto3 libraries required for certain ADX functionality 63 | resource "aws_lambda_layer_version" "Boto3LibLayer" { 64 | filename = "Boto3LibLayer.zip" 65 | layer_name = "Boto3LibLayer" 66 | description = "Provides Boto3 (v1.17) as a Lambda Layer to support the latest AWS SDK capabilities" 67 | source_code_hash = filebase64sha256("Boto3LibLayer.zip") 68 | compatible_runtimes = [ "python3.7" ] 69 | } 70 | 71 | # Create new EventBridge rule to trigger on the Revision Published To DataSet event 72 | resource "aws_cloudwatch_event_rule" "NewRevisionEventRule" { 73 | name = "NewRevisionEventRule" 74 | description = "New Revision Event" 75 | event_pattern = jsonencode({ 76 | source = ["aws.dataexchange"], 77 | detail-type = ["Revision Published To Data Set"], 78 | resources = [var.datasetID] 79 | }) 80 | } 81 | 82 | # Create trigger for EventBRidge rule to Lambda function 83 | resource "aws_cloudwatch_event_target" "TargetGetNewRevision" { 84 | rule = aws_cloudwatch_event_rule.NewRevisionEventRule.name 85 | target_id = "TargetGetNewRevision" 86 | arn = aws_lambda_function.FunctionGetNewRevision.arn 87 | } 88 | 89 | # Create Lambda Execution Role 90 | resource "aws_iam_role" "RoleGetNewRevision" { 91 | name = "RoleGetNewRevision" 92 | assume_role_policy = jsonencode({ 93 | Version = "2012-10-17", 94 | Statement = [ 95 | { 96 | Effect = "Allow", 97 | Principal = { 98 | Service = "lambda.amazonaws.com" 99 | }, 100 | Action = "sts:AssumeRole" 101 | } 102 | ] 103 | }) 104 | } 105 | 106 | # Add Required Policies to Lambda Execution Role 107 | resource "aws_iam_role_policy" "RoleGetNewRevisionPolicy" { 108 | name = "RoleGetNewRevisionPolicy" 109 | role = aws_iam_role.RoleGetNewRevision.id 110 | policy = jsonencode({ 111 | Version = "2012-10-17", 112 | Statement = [ 113 | { 114 | Effect = "Allow" 115 | Action = [ 116 | "dataexchange:StartJob", 117 | "dataexchange:CreateJob", 118 | "dataexchange:GetJob", 119 | "dataexchange:ListRevisionAssets", 120 | "dataexchange:GetAsset", 121 | "dataexchange:GetRevision" 122 | ] 123 | Resource = "*" 124 | }, 125 | { 126 | Effect = "Allow", 127 | Action = "s3:GetObject", 128 | Resource = "arn:aws:s3:::*aws-data-exchange*" 129 | Condition = { 130 | "ForAnyValue:StringEquals" = { 131 | "aws:CalledVia" = [ 132 | "dataexchange.amazonaws.com" 133 | ] 134 | } 135 | } 136 | }, 137 | { 138 | Effect = "Allow", 139 | Action = "s3:PutObject", 140 | Resource = [ 141 | aws_s3_bucket.DataS3Bucket.arn, 142 | join("",[aws_s3_bucket.DataS3Bucket.arn,"/*"]) 143 | ] 144 | } 145 | ] 146 | }) 147 | } 148 | 149 | # Attach LambdaBasicExecutionRole AWS Managed Policy to Lambda Execution Role 150 | resource "aws_iam_role_policy_attachment" "RoleGetNewRevisionAttachment" { 151 | role = aws_iam_role.RoleGetNewRevision.name 152 | policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 153 | } 154 | 155 | # Provide permission for EventBridge to invoke Lambda function 156 | resource "aws_lambda_permission" "LambdaInvokePermission" { 157 | action = "lambda:InvokeFunction" 158 | function_name = aws_lambda_function.FunctionGetNewRevision.function_name 159 | principal = "events.amazonaws.com" 160 | source_arn = aws_cloudwatch_event_rule.NewRevisionEventRule.arn 161 | } 162 | 163 | # Invoke Lambda function for initial data export 164 | data "aws_lambda_invocation" "FistRevision" { 165 | function_name = aws_lambda_function.FunctionGetNewRevision.function_name 166 | input = jsonencode( 167 | { 168 | InitialInit = { 169 | data_set_id = var.datasetID, 170 | RevisionIds = var.revisionID 171 | } 172 | } 173 | ) 174 | } -------------------------------------------------------------------------------- /subscribers/python/tf-auto-export-to-s3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | zip lambda_code.zip index.py 3 | zip -r Boto3LibLayer.zip dataexchange -------------------------------------------------------------------------------- /subscribers/python/tf-auto-export-to-s3/dataexchange/2017-07-25/paginators-1.json: -------------------------------------------------------------------------------- 1 | { 2 | "pagination": { 3 | "ListDataSetRevisions": { 4 | "input_token": "NextToken", 5 | "output_token": "NextToken", 6 | "limit_key": "MaxResults", 7 | "result_key": "Revisions" 8 | }, 9 | "ListDataSets": { 10 | "input_token": "NextToken", 11 | "output_token": "NextToken", 12 | "limit_key": "MaxResults", 13 | "result_key": "DataSets" 14 | }, 15 | "ListJobs": { 16 | "input_token": "NextToken", 17 | "output_token": "NextToken", 18 | "limit_key": "MaxResults", 19 | "result_key": "Jobs" 20 | }, 21 | "ListRevisionAssets": { 22 | "input_token": "NextToken", 23 | "output_token": "NextToken", 24 | "limit_key": "MaxResults", 25 | "result_key": "Assets" 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /subscribers/python/tf-auto-export-to-s3/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['AWS_DATA_PATH'] = '/opt/' 3 | 4 | from itertools import islice 5 | import boto3 6 | from datetime import datetime 7 | import time 8 | import json 9 | 10 | region = os.environ['AWS_REGION'] 11 | destination_bucket = os.environ['S3_BUCKET'] 12 | 13 | if not destination_bucket: 14 | raise Exception("'S3_BUCKET' environment variable must be defined!") 15 | 16 | # Grouper recipe from standard docs: https://docs.python.org/3/library/itertools.html 17 | def grouper(iterable, n): 18 | iterator = iter(iterable) 19 | group = tuple(islice(iterator, n)) 20 | while group: 21 | yield group 22 | group = tuple(islice(iterator, n)) 23 | 24 | def handler(event, context): 25 | dataexchange = boto3.client( 26 | service_name='dataexchange', 27 | region_name=region 28 | ) 29 | s3 = boto3.client( 30 | service_name='s3', 31 | region_name=region 32 | ) 33 | #If the request is from Terraform get the RevisionID, for first revision 34 | if 'InitialInit' in event: 35 | data_set_id = event['InitialInit']['data_set_id'] 36 | revision_ids = [event['InitialInit']['RevisionIds']] 37 | print ("Initial revision retrieval") 38 | print (event) 39 | else: 40 | data_set_id = event['resources'][0] 41 | revision_ids = event['detail']['RevisionIds'] 42 | print ("Triggered revision retrieval") 43 | print (event) 44 | # Used to store the Ids of the Jobs exporting the assets to S3. 45 | job_ids = set() 46 | 47 | # Create an ExportRevisionToS3 Job for each Revision ID 48 | for revision_id in revision_ids: 49 | 50 | export_job = dataexchange.create_job( 51 | Type='EXPORT_REVISIONS_TO_S3', 52 | Details={ 53 | 'ExportRevisionsToS3': { 54 | 'DataSetId': data_set_id, 55 | 'RevisionDestinations': [ 56 | { 57 | 'Bucket': destination_bucket, 58 | 'KeyPattern': data_set_id+'/${Revision.Id}/${Asset.Name}', 59 | 'RevisionId': revision_id 60 | } 61 | ] 62 | } 63 | } 64 | ) 65 | # Start the Job and save the JobId. 66 | dataexchange.start_job(JobId=export_job['Id']) 67 | job_ids.add(export_job['Id']) 68 | 69 | # Iterate until all remaining workflow have reached a terminal state, or an error is found. 70 | completed_jobs = set() 71 | while job_ids != completed_jobs: 72 | for job_id in job_ids: 73 | if job_id in completed_jobs: 74 | continue 75 | get_job_response = dataexchange.get_job(JobId=job_id) 76 | if get_job_response['State'] == 'COMPLETED': 77 | print ("Job {} completed".format(job_id)) 78 | completed_jobs.add(job_id) 79 | if get_job_response['State'] == 'ERROR': 80 | job_errors = get_job_response['Errors'] 81 | raise Exception('JobId: {} failed with errors:\n{}'.format(job_id, job_errors)) 82 | # Sleep to ensure we don't get throttled by the GetJob API. 83 | time.sleep(0.2) 84 | return { 85 | 'statusCode': 200, 86 | 'body': json.dumps('All jobs completed.') 87 | } -------------------------------------------------------------------------------- /subscribers/ruby/adx-for-apis-simple/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | gem 'oga' 7 | -------------------------------------------------------------------------------- /subscribers/ruby/adx-for-apis-simple/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | ansi (1.5.0) 5 | ast (2.4.2) 6 | aws-eventstream (1.2.0) 7 | aws-partitions (1.626.0) 8 | aws-sdk-core (3.141.0) 9 | aws-eventstream (~> 1, >= 1.0.2) 10 | aws-partitions (~> 1, >= 1.525.0) 11 | aws-sigv4 (~> 1.1) 12 | jmespath (~> 1, >= 1.6.1) 13 | aws-sdk-dataexchange (1.26.0) 14 | aws-sdk-core (~> 3, >= 3.127.0) 15 | aws-sigv4 (~> 1.1) 16 | aws-sigv4 (1.5.1) 17 | aws-eventstream (~> 1, >= 1.0.2) 18 | jmespath (1.6.1) 19 | oga (3.4) 20 | ast 21 | ruby-ll (~> 2.1) 22 | ruby-ll (2.1.2) 23 | ansi 24 | ast 25 | 26 | PLATFORMS 27 | x86_64-linux 28 | 29 | DEPENDENCIES 30 | aws-sdk-dataexchange 31 | oga 32 | 33 | BUNDLED WITH 34 | 2.3.7 35 | -------------------------------------------------------------------------------- /subscribers/ruby/adx-for-apis-simple/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | This sample code will show you how to integrate with the [AWS Data Exchange for APIs (Test Product)][APITestProduct]. This simple test product echoes the request sent to it, so you can try out different HTTP Methods (GET, POST, etc.), Query String Parameters, Headers, and Body content as you explore API based data consumption. By the end of this exercise you'll know how to use the [AWS Data Exchange Client Software Development Kit (SDK)][Tools] with [Ruby][AWSDataExchangeSDKForRuby] to make a `SendApiAsset` request to an API based AWS Data Exchange product. 4 | 5 | This will typically consist of three main stages: 6 | 1. Import relevant SDK Clients and Interfaces, establish base Client configuration, and instantiate the Client. \ 7 | _(This stage will remain consistent across all potential AWS Data Exchange for APIs use cases)_ 8 | 2. Define the relevant Provider / Product specific identities) \ 9 | _(This stage will remain consistent across all uses of a given Product)_ 10 | 3. Define the request-specific parameters based on your business need. \ 11 | _(This stage will likely change for every request)_ 12 | 13 | ## Getting Started 14 | Install ruby version 3.1.X from ["Download Ruby"][RubyGettingStarted]. 15 | 16 | To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the ["AWS Data Exchange for APIs (Test product)"][APITestProduct] Product, and subscribe. 17 | Copy the relevant DataSetId, RevisionId, and AssetId from the Entitled Data page and paste them into the Product Info variables (asset_id, revision_id, data_set_id) in the code sample (send_api_asset.rb) (you will likely find they match the sample contents). Next, update the `send_api_asset` parameters based on your desired request parameters. Again, for test purposes, the provided inputs should work just fine. Finally, execute the sample program using: ruby send_api_asset.rb . 18 | 19 | To assist with finding the necessary inputs for the `sendApiAssetInput` variable, the Data Exchange console provides Sample CLI requests as shown below. The first 3 parameters map to the Product Info variables (data_set_id, revision_id and asset_id), and the rest map to sendApiAssetInput 20 | ``` 21 | aws dataexchange send-api-asset \ 22 | --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 23 | --revision-id 32559097c7d209b02af6de5cad4385fe \ 24 | --asset-id 4e94198cfdb8400793fb3f0411861960 \ 25 | --method POST \ 26 | --path "/" \ 27 | --query-string-parameters 'param1=value1,param2=value2' \ 28 | --request-headers 'header=header_value' \ 29 | --body "{\"body_param\":\"body_param_value\"}" 30 | ``` 31 | 32 | ## Authentication 33 | By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. For local development purposes, this will typically use credentials provided to the AWS CLI by [`aws configure`][AWSConfigure]. When running on Amazon EC2 it will typically use the [EC2 Instance Profile][IAMRolesForEC2], and for AWS Lambda it will use the [Lambda Execution Role][LambdaExecutionRole]. 34 | 35 | [RubyGettingStarted]: https://www.ruby-lang.org/en/downloads/ 36 | [APITestProduct]: https://us-east-1.console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-pgkxrurxwmp76 37 | [Tools]: https://aws.amazon.com/tools/ 38 | [AWSDataExchangeSDKForRuby]: https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/DataExchange.html# 39 | [IAMRolesForEC2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html 40 | [LambdaExecutionRole]: https://docs.aws.amazon.com/lambda/latest/dg/lambda-intro-execution-role.html 41 | [AWSConfigure]: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html 42 | -------------------------------------------------------------------------------- /subscribers/ruby/adx-for-apis-simple/send_api_asset.rb: -------------------------------------------------------------------------------- 1 | #Please see the README.md on the GitHub AWS Data Exchange Samples repository for a more detailed overview with links to relevant AWS documentation. 2 | # 3 | #This code is provided as a sample of how to use the AWS Data Exchange Client Software Development Kit (SDK) to connect to Data Exchange For APIs 4 | #based Data Sets. This will typically consist of three main stages: 5 | #1. Import relevant SDK Clients, establish base Client configuration, and instantiate the Client. 6 | # (This stage will remain consistent across all potential AWS Data Exchange for APIs use cases) 7 | #2. Define the relevant Provider / Product specific identities) 8 | # (This stage will remain consistent across all uses of a given Product. 9 | #3. Define the request-specific parameters based on your business need. 10 | # (This stage will likely change for every request) 11 | # 12 | #To get started, sign in to the AWS Management Console, browse to AWS Data Exchange, search for the "AWS Data Exchange for APIs (Test product)" 13 | #Product, and subscribe. 14 | #Copy the relevant DataSetId, RevisionId, and AssetId from the Entitled Data page and paste them into the productInfo constant below 15 | # 16 | #Familiarity with ruby programming language is assumed. For ruby programming language documentation visit: https://www.ruby-lang.org/en/documentation/quickstart/ 17 | # 18 | #To assist with finding the necessary inputs for the asset_id, revision_id and data_set_id, the Data Exchange console provides 19 | #Sample CLI requests as shown below. The first 3 parameters map to the product specific identities, and the rest are additional send_api_asset call parameters 20 | #aws dataexchange send-api-asset \ 21 | # --data-set-id 8d494cba5e4720e5f6072e280daf70a8 \ 22 | # --revision-id 32559097c7d209b02af6de5cad4385fe \ 23 | # --asset-id 4e94198cfdb8400793fb3f0411861960 \ 24 | # --method POST \ 25 | # --path "/" \ 26 | # --query-string-parameters 'param1=value1,param2=value2' \ 27 | # --request-headers 'header=header_value' \ 28 | # --body "{\"body_param\":\"body_param_value\"}" 29 | # 30 | #By default, this code will authenticate against AWS Data Exchange using the configuration of the environment in which it runs. 31 | #For local development purposes, this will typically use credentials provided to the AWS CLI by `aws configure` 32 | #When running on Amazon EC2 it will typically use the EC2 Instance Profile, and for AWS Lambda it will use the Lambda Execution Role. 33 | # 34 | #To execute this code: 35 | # 36 | #bundle install 37 | #ruby send_api_asset.rb 38 | 39 | require 'aws-sdk-dataexchange' 40 | require 'json' 41 | 42 | #obtain credentials from 'default' profile in shared credentials file 43 | credentials = Aws::SharedCredentials.new 44 | 45 | client = Aws::DataExchange::Client::new(region: 'us-east-1', credentials: credentials) 46 | 47 | # Product specific identities 48 | asset_id = "4e94198cfdb8400793fb3f0411861960" 49 | revision_id = "32559097c7d209b02af6de5cad4385fe" 50 | data_set_id = "8d494cba5e4720e5f6072e280daf70a8" 51 | 52 | # adjust the send_api_asset call parameters based on your needs. 53 | resp = client.send_api_asset( 54 | body: {"body_param" => "body_param_value"}.to_json, 55 | query_string_parameters: {"param1" => "value1", "param2" => "value2"}, 56 | asset_id: asset_id, 57 | revision_id: revision_id, 58 | data_set_id: data_set_id, 59 | method: "POST", 60 | path: "/" 61 | ) 62 | 63 | puts "Response Headers:" 64 | puts resp.response_headers 65 | puts 66 | puts "Response Body:" 67 | puts resp.body.string 68 | -------------------------------------------------------------------------------- /subscribers/ruby/all-entitled-datasets/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | -------------------------------------------------------------------------------- /subscribers/ruby/all-entitled-datasets/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | aws-eventstream (1.0.3) 5 | aws-partitions (1.239.0) 6 | aws-sdk-core (3.77.0) 7 | aws-eventstream (~> 1.0, >= 1.0.2) 8 | aws-partitions (~> 1, >= 1.239.0) 9 | aws-sigv4 (~> 1.1) 10 | jmespath (~> 1.0) 11 | aws-sdk-dataexchange (1.0.0) 12 | aws-sdk-core (~> 3, >= 3.71.0) 13 | aws-sigv4 (~> 1.1) 14 | aws-sigv4 (1.1.0) 15 | aws-eventstream (~> 1.0, >= 1.0.2) 16 | jmespath (1.6.1) 17 | 18 | PLATFORMS 19 | ruby 20 | 21 | DEPENDENCIES 22 | aws-sdk-dataexchange 23 | 24 | BUNDLED WITH 25 | 1.17.3 26 | -------------------------------------------------------------------------------- /subscribers/ruby/all-entitled-datasets/README.md: -------------------------------------------------------------------------------- 1 | # All Entitled Data Sets (Ruby) 2 | 3 | This sample retrieves a list of all subscriber's entitled data sets. 4 | 5 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` and `AWS_REGION`. 6 | 7 | ``` 8 | $ bundle exec ruby all-entitled-datasets.rb 9 | 10 | prod-zg4u6tpyxud5i/7ae12084f47ea658ab62ee90edd513dd: NYC Property Sales 2014 11 | Over 80,000 property sales in New York City in 2014 12 | prod-zg4u6tpyxud5i/fc19d00c8780199e4fccd21f4834c905: NYC Property Sales 2018 13 | A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 14 | prod-zg4u6tpyxud5i/05964b659bbcb607d43c0d5845838e7f: NYC Property Sales 2015 15 | Over 80,000 property sales in New York City in 2015 16 | prod-zg4u6tpyxud5i/7d8f73e3c5acdde79fd2874dd98afdcd: NYC Property Sales 2016 17 | Over 80,000 property sales in New York City in 2016 18 | prod-zg4u6tpyxud5i/50782dc315b94e46fdbd4a12cec6820e: NYC Property Sales 2017 19 | Records of over 80,000 property sales transactions. 20 | ``` 21 | -------------------------------------------------------------------------------- /subscribers/ruby/all-entitled-datasets/all-entitled-datasets.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'aws-sdk-dataexchange' 4 | 5 | Aws.config.update( 6 | region: ENV['AWS_REGION'] || 'us-east-1', 7 | credentials: Aws::Credentials.new( 8 | ENV['AWS_ACCESS_KEY_ID'], 9 | ENV['AWS_SECRET_ACCESS_KEY'], 10 | ENV['AWS_SESSION_TOKEN'] 11 | ) 12 | ) 13 | 14 | dx = Aws::DataExchange::Client.new 15 | 16 | dx.list_data_sets(origin: 'ENTITLED').each do |response| 17 | response.data_sets.each do |data_set| 18 | puts "#{data_set.origin_details.product_id}/#{data_set.id}: #{data_set.name}\n #{data_set.description}" 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /subscribers/ruby/most-expensive-neighborhoods-in-nyc/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | gem 'aws-sdk-dataexchange' 6 | gem 'aws-sdk-s3' 7 | gem 'i18n' 8 | gem 'monetize' 9 | gem 'money' 10 | gem 'simple_statistics' 11 | gem 'smarter_csv' 12 | -------------------------------------------------------------------------------- /subscribers/ruby/most-expensive-neighborhoods-in-nyc/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | aws-eventstream (1.0.3) 5 | aws-partitions (1.239.0) 6 | aws-sdk-core (3.77.0) 7 | aws-eventstream (~> 1.0, >= 1.0.2) 8 | aws-partitions (~> 1, >= 1.239.0) 9 | aws-sigv4 (~> 1.1) 10 | jmespath (~> 1.0) 11 | aws-sdk-dataexchange (1.0.0) 12 | aws-sdk-core (~> 3, >= 3.71.0) 13 | aws-sigv4 (~> 1.1) 14 | aws-sdk-kms (1.25.0) 15 | aws-sdk-core (~> 3, >= 3.71.0) 16 | aws-sigv4 (~> 1.1) 17 | aws-sdk-s3 (1.54.0) 18 | aws-sdk-core (~> 3, >= 3.77.0) 19 | aws-sdk-kms (~> 1) 20 | aws-sigv4 (~> 1.1) 21 | aws-sigv4 (1.1.0) 22 | aws-eventstream (~> 1.0, >= 1.0.2) 23 | concurrent-ruby (1.1.5) 24 | i18n (1.7.0) 25 | concurrent-ruby (~> 1.0) 26 | jmespath (1.6.1) 27 | monetize (1.9.2) 28 | money (~> 6.12) 29 | money (6.13.4) 30 | i18n (>= 0.6.4, <= 2) 31 | simple_statistics (0.12) 32 | smarter_csv (1.2.6) 33 | 34 | PLATFORMS 35 | ruby 36 | 37 | DEPENDENCIES 38 | aws-sdk-dataexchange 39 | aws-sdk-s3 40 | i18n 41 | monetize 42 | money 43 | simple_statistics 44 | smarter_csv 45 | 46 | BUNDLED WITH 47 | 1.17.3 48 | -------------------------------------------------------------------------------- /subscribers/ruby/most-expensive-neighborhoods-in-nyc/README.md: -------------------------------------------------------------------------------- 1 | # Most Expensive Neighborhoods in NYC 2 | 3 | This sample uses [New York City Property Sales (2014-2018)](https://console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-27ompcouk2o6i) provided by [Enigma](https://aws.amazon.com/marketplace/seller-profile?id=46c64acb-20c1-41fe-a495-a364f64d0083) with a free subscription. 4 | 5 | The code retrieves data sets between 2014 and 2018, enumerates data set revisions, exports latest CSV assets to S3, downloads the files to a local temporary location, imports the CSVs, calculates the median sale price in each neighborhood for all sales over $100,000, and displays the 10 most expensive neighborhoods. 6 | 7 | To run the sample, set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` and `AWS_REGION`, subscribe to the product on AWS Data Exchange and replace the data set IDs and the S3 bucket name in the code. 8 | 9 | ``` 10 | $ bundle exec ruby most-expensive-neighborhoods-in-nyc.rb 11 | 12 | NYC Property Sales 2017: Records of over 80,000 property sales transactions. 13 | 96023397ee826914fefcef392b218c7b (Oct-17-2019) created 2019-10-28 16:01:46 UTC 14 | e591f6f30d29c5d566c34a7436be701a 2017_NYC_Property_Sales__10172019 .csv 15 | Exporting 2017_NYC_Property_Sales__10172019 .csv to S3 ......... done. 16 | Loading 2017_NYC_Property_Sales__10172019 .csv ........... done. 17 | 18 | NYC Property Sales 2018: A table of 80,000+ New York City property sales occurring in 2018, organized by borough, including sale price and sale date. 19 | b0457c8b3c201115daa0f6ca8f2c4140 (2018 Property Sales from 10172019) created 2019-10-28 16:01:47 UTC 20 | 01535eb11937b7f6ee825c512cb58582 2018_NYC_Property_Sales__10172019.csv 21 | Exporting 2018_NYC_Property_Sales__10172019.csv to S3 ......... done. 22 | Loading 2018_NYC_Property_Sales__10172019.csv ........... done. 23 | 24 | 10 Most Expensive NYC Neighborhoods: 25 | 26 | EAST RIVER: $11,200,000 27 | CIVIC CENTER: $4,737,500 28 | LITTLE ITALY: $3,709,377 29 | SOHO: $2,983,750 30 | TRIBECA: $2,797,500 31 | FLATIRON: $2,450,000 32 | FASHION: $2,394,614 33 | NAVY YARD: $2,050,000 34 | ROSSVILLE-PORT MOBIL: $1,950,000 35 | RED HOOK: $1,910,000 36 | ``` 37 | -------------------------------------------------------------------------------- /subscribers/ruby/most-expensive-neighborhoods-in-nyc/most-expensive-neighborhoods-in-nyc.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'aws-sdk-dataexchange' 4 | require 'aws-sdk-s3' 5 | require 'smarter_csv' 6 | require 'simple_statistics' 7 | require 'money' 8 | require 'i18n' 9 | require 'monetize' 10 | 11 | I18n.enforce_available_locales = false 12 | Money.locale_backend = :i18n 13 | 14 | Aws.config.update( 15 | region: ENV['AWS_REGION'] || 'us-east-1', 16 | credentials: Aws::Credentials.new( 17 | ENV['AWS_ACCESS_KEY_ID'], 18 | ENV['AWS_SECRET_ACCESS_KEY'], 19 | ENV['AWS_SESSION_TOKEN'] 20 | ) 21 | ) 22 | 23 | # data sets provided by Enigma 24 | # https://console.aws.amazon.com/dataexchange/home?region=us-east-1#/products/prodview-27ompcouk2o6i 25 | 26 | data_sets = { 27 | 2014 => '7ae12084f47ea658ab62ee90edd513dd', 28 | 2015 => '05964b659bbcb607d43c0d5845838e7f', 29 | 2016 => '7d8f73e3c5acdde79fd2874dd98afdcd', 30 | 2017 => '50782dc315b94e46fdbd4a12cec6820e', 31 | 2018 => 'fc19d00c8780199e4fccd21f4834c905' 32 | } 33 | 34 | s3_bucket_name = ENV['S3_BUCKET_NAME'] || raise('missing ENV["S3_BUCKET_NAME"]') 35 | 36 | dx = Aws::DataExchange::Client.new 37 | 38 | neighborhood_sale_prices = {} 39 | 40 | data_sets.each_pair do |_year, data_set_id| 41 | data_set = dx.get_data_set( 42 | data_set_id: data_set_id 43 | ) 44 | 45 | puts "#{data_set.name}: #{data_set.description}" 46 | 47 | latest_asset = nil 48 | 49 | # fetch revisions for this data set 50 | 51 | revisions = dx.list_data_set_revisions( 52 | data_set_id: data_set.id 53 | ).map(&:revisions).flatten 54 | 55 | revisions.each do |revision| 56 | puts "#{revision.id} (#{revision.comment}) created #{revision.created_at}" 57 | 58 | # fetch assets for this revision 59 | 60 | assets = dx.list_revision_assets( 61 | data_set_id: data_set.id, 62 | revision_id: revision.id 63 | ).map(&:assets).flatten 64 | 65 | # the first result is the latest asset 66 | 67 | assets.each do |asset| 68 | puts "#{asset.id} #{asset.name}" 69 | latest_asset ||= asset 70 | end 71 | end 72 | 73 | return unless latest_asset 74 | 75 | # export data to S3 76 | 77 | STDOUT.write "Exporting #{latest_asset.name} to S3 ..." 78 | 79 | export_job = dx.create_job( 80 | type: 'EXPORT_ASSETS_TO_S3', 81 | details: { 82 | export_assets_to_s3: { 83 | asset_destinations: [ 84 | asset_id: latest_asset.id, 85 | bucket: s3_bucket_name, 86 | key: "data/#{latest_asset.name}" 87 | ], 88 | data_set_id: latest_asset.data_set_id, 89 | revision_id: latest_asset.revision_id 90 | } 91 | } 92 | ) 93 | 94 | dx.start_job(job_id: export_job.id) 95 | 96 | loop do 97 | sleep 1 98 | job_in_progress = dx.get_job(job_id: export_job.id) 99 | STDOUT.write('.') 100 | state = job_in_progress.state 101 | next if state == 'IN_PROGRESS' || state == 'WAITING' 102 | break if state == 'COMPLETED' 103 | if job_in_progress.state == 'ERROR' 104 | raise job_in_progress.errors.join(&:to_s) 105 | end 106 | 107 | raise job_in_progress.state 108 | end 109 | 110 | puts ' done.' 111 | 112 | STDOUT.write "Loading #{latest_asset.name} ..." 113 | 114 | s3 = Aws::S3::Client.new 115 | 116 | # load data from S3 117 | 118 | Tempfile.create do |f| 119 | s3.get_object({ 120 | bucket: s3_bucket_name, 121 | key: "data/#{latest_asset.name}" 122 | }, target: f) 123 | 124 | rows = 0 125 | SmarterCSV.process(f, row_sep: :auto, col_sep: ',', file_encoding: Encoding::UTF_8) do |coll| 126 | coll.each do |row| 127 | rows += 1 128 | STDOUT.write('.') if rows % 10_000 == 0 129 | sale_price = Monetize.parse(row[:sale_price]).to_f 130 | next unless sale_price > 100_000 131 | 132 | neighborhood_sale_prices[row[:neighborhood]] ||= [] 133 | neighborhood_sale_prices[row[:neighborhood]] << sale_price 134 | end 135 | end 136 | 137 | puts ' done.' 138 | end 139 | end 140 | 141 | puts '10 Most Expensive NYC Neighborhoods:' 142 | 143 | neighborhood_median_sale_prices = Hash[neighborhood_sale_prices.map do |neighborhood, prices| 144 | [neighborhood, prices.median] 145 | end] 146 | 147 | neighborhood_median_sale_prices 148 | .sort_by { |_neighborhood, median_price| -median_price } 149 | .take(10) 150 | .each do |neighborhood_median_price_pair| 151 | dollars = Money.new(neighborhood_median_price_pair[1] * 100, 'USD') 152 | .format(thousands_separator: ',', drop_trailing_zeros: true) 153 | puts "#{neighborhood_median_price_pair[0]}: #{dollars}" 154 | end 155 | --------------------------------------------------------------------------------