├── .github └── workflows │ ├── python.yml │ ├── secrets-scan.yml │ ├── security-scan.yml │ └── shellcheck-scan.yml ├── .gitignore ├── .images ├── architecture-data-collection-compute-optimizer.png ├── architecture-data-collection-deploy.png ├── architecture-data-collection-detailed.png ├── architecture-data-collection.png ├── architecture-data-export-replication-to-secondary.png ├── architecture-data-exports-advanced.png ├── architecture-data-exports.png ├── architecture-legacy-cur.png ├── deployment-guide-button.svg └── documentation.svg ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── SECURITY.md ├── case-summarization ├── GUARDRAIL.md ├── README.md ├── deploy │ ├── case-summarization.yaml │ └── guardrail.yaml ├── images │ └── archi.png ├── layer │ ├── build-layer.sh │ ├── publish-lambda-layer.sh │ └── requirements.txt └── utils │ ├── bump-release.py │ ├── release.sh │ ├── upload.sh │ └── version.json ├── data-collection ├── CHANGELOG.md ├── CONTRIBUTING.md ├── README.md ├── deploy │ ├── account-collector.yaml │ ├── data │ │ └── rds_graviton_mapping.csv │ ├── deploy-data-collection.yaml │ ├── deploy-data-read-permissions.yaml │ ├── deploy-in-linked-account.yaml │ ├── deploy-in-management-account.yaml │ ├── module-aws-feeds.yaml │ ├── module-backup.yaml │ ├── module-budgets.yaml │ ├── module-compute-optimizer.yaml │ ├── module-cost-anomaly.yaml │ ├── module-cost-explorer-rightsizing.yaml │ ├── module-ecs-chargeback.yaml │ ├── module-health-events.yaml │ ├── module-inventory.yaml │ ├── module-isv-feeds.yaml │ ├── module-license-manager.yaml │ ├── module-organization.yaml │ ├── module-pricing.yaml │ ├── module-quicksight.yaml │ ├── module-rds-usage.yaml │ ├── module-service-quotas.yaml │ ├── module-support-cases.yaml │ ├── module-transit-gateway.yaml │ ├── module-trusted-advisor.yaml │ ├── module-workspaces-metrics.yaml │ └── source │ │ ├── ecs │ │ └── Athena │ │ │ ├── bu_usage_view.sql │ │ │ ├── cluster_metadata_view.sql │ │ │ ├── ec2_cluster_costs_view.sql │ │ │ └── ecs_chargeback_report.sql │ │ ├── partition_repair_util.py │ │ ├── regions.csv │ │ ├── s3_backwards_comp.py │ │ ├── s3_files_migration.py │ │ └── step-functions │ │ ├── awsfeeds-state-machine-v1.json │ │ ├── crawler-state-machine.json │ │ ├── health-detail-state-machine.json │ │ ├── main-state-machine-v2.json │ │ ├── main-state-machine-v3.json │ │ ├── main-state-machine.json │ │ └── standalone-state-machine.json └── utils │ ├── bump-release.py │ ├── release.sh │ ├── upload.sh │ └── version.json ├── data-exports ├── README.md ├── deploy │ ├── cur-aggregation.yaml │ └── data-exports-aggregation.yaml └── utils │ └── release.sh ├── pytest.ini ├── rls ├── .gitignore ├── README.md ├── deploy │ ├── create_rls.py │ └── deploy_cid_rls.yaml └── utils │ ├── build.sh │ ├── bump-release.py │ ├── qs_s3_manifest.json │ ├── release.sh │ ├── tagger │ ├── aws_org_tagger_lambda.py │ └── data.csv │ ├── tox.ini │ └── version.json ├── security-hub └── deploy │ └── module-securityhub.yaml ├── test ├── README.md ├── cleanup.py ├── conftest.py ├── debugstackets.yml ├── run-test-from-scratch.sh ├── test_from_scratch.py └── utils.py └── utils ├── lint.sh └── pylint.py /.github/workflows/python.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Python Quality Check 3 | 4 | on: 5 | pull_request: 6 | branches: 7 | - '*' 8 | 9 | jobs: 10 | 11 | pylint-scan: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Git clone the repository 15 | uses: actions/checkout@v3 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.10' 20 | - name: Install packages 21 | run: | 22 | pip install cfn-flip pylint urllib3 boto3 bandit 23 | - name: Pylint all 24 | run: | 25 | python utils/pylint.py 26 | -------------------------------------------------------------------------------- /.github/workflows/secrets-scan.yml: -------------------------------------------------------------------------------- 1 | name: TruffleHog Secrets Scan 2 | on: [pull_request] 3 | jobs: 4 | TruffleHog: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout code 8 | uses: actions/checkout@v3 9 | with: 10 | fetch-depth: 0 11 | - name: TruffleHog OSS 12 | uses: trufflesecurity/trufflehog@main 13 | with: 14 | path: ./ 15 | base: ${{ github.event.repository.default_branch }} 16 | head: HEAD 17 | extra_args: --debug --only-verified -------------------------------------------------------------------------------- /.github/workflows/security-scan.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Security Scan 3 | 4 | on: 5 | pull_request: 6 | branches: 7 | - '*' 8 | 9 | jobs: 10 | 11 | cfn-lint-cfn-nag-scan: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Git clone the repository 15 | uses: actions/checkout@v3 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.10' 20 | - name: Set up Ruby 21 | uses: ruby/setup-ruby@v1 22 | with: 23 | ruby-version: '3.1' 24 | - name: Install CFN tools 25 | run: | 26 | gem install cfn-nag 27 | - name: Install cfn-lint 28 | run: | 29 | pip install cfn-lint checkov 30 | pip install --no-cache-dir packaging cyclonedx-python-lib=='5.2.0' #https://github.com/bridgecrewio/checkov/issues/5841 31 | - name: Scan all templates 32 | run: | 33 | utils/lint.sh 34 | -------------------------------------------------------------------------------- /.github/workflows/shellcheck-scan.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Shellcheck Scan 3 | 4 | on: 5 | pull_request: 6 | branches: 7 | - '*' 8 | 9 | jobs: 10 | 11 | shellcheck-scan: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Git clone the repository 15 | uses: actions/checkout@v3 16 | - name: Scan all bash 17 | run: | 18 | find ./ -type f -name "*.sh" -exec shellcheck {} + 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .DS_Store 163 | .vscode 164 | 165 | # Packaged modules 166 | fof.zip 167 | ecs.zip 168 | ta.zip 169 | 170 | # Local sandbox 171 | sandbox/ 172 | scratch/ 173 | migration_log.csv 174 | # Pylint custom script temporal folder 175 | .tmp 176 | 177 | data-collection/lambda-layers/python/ 178 | data-collection/lambda-layers/layer.zip 179 | data-collection/test/clean-html.py 180 | -------------------------------------------------------------------------------- /.images/architecture-data-collection-compute-optimizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection-compute-optimizer.png -------------------------------------------------------------------------------- /.images/architecture-data-collection-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection-deploy.png -------------------------------------------------------------------------------- /.images/architecture-data-collection-detailed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection-detailed.png -------------------------------------------------------------------------------- /.images/architecture-data-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection.png -------------------------------------------------------------------------------- /.images/architecture-data-export-replication-to-secondary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-export-replication-to-secondary.png -------------------------------------------------------------------------------- /.images/architecture-data-exports-advanced.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-exports-advanced.png -------------------------------------------------------------------------------- /.images/architecture-data-exports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-exports.png -------------------------------------------------------------------------------- /.images/architecture-legacy-cur.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-legacy-cur.png -------------------------------------------------------------------------------- /.images/deployment-guide-button.svg: -------------------------------------------------------------------------------- 1 | 2 | Deployment Guide 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | Deployment Guide 19 | 20 | 21 | -------------------------------------------------------------------------------- /.images/documentation.svg: -------------------------------------------------------------------------------- 1 | 2 | Documentation 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | Documentation 19 | 20 | 21 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Contribution to different modules 48 | Please follow specific guides for contribution to specific elements of the CID framework 49 | * [data-collection](/data-collection/CONTRIBUTING.md) 50 | 51 | ## Code of Conduct 52 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 53 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 54 | opensource-codeofconduct@amazon.com with any additional questions or comments. 55 | 56 | 57 | ## Security issue notifications 58 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 59 | 60 | 61 | ## Licensing 62 | 63 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Cloud Intelligence Dashboards - Data Collection 2 | 3 | ## Table of Contents 4 | 1. [Overview](#Overview) 5 | 1. [Architecture of Data Exports](#Architecture-of-Data-Exports) 6 | 1. [Architecture of Data Collection](#Architecture-of-Data-Collection) 7 | 1. [Cost](#Cost) 8 | 1. [Prerequisites](#Prerequisites) 9 | 1. [Regions](#Regions) 10 | 1. [Deployment and Cleanup Steps](#Deployment-and-Cleanup-Steps) 11 | 1. [Changelogs](#Changelogs) 12 | 1. [Feedback](#Feedback) 13 | 1. [Security](#Security) 14 | 1. [License](#License) 15 | 1. [Notices](#Notices) 16 | 17 | ## Overview 18 | This repository is a part of [Cloud Intelligence Dashboards](https://catalog.workshops.aws/awscid), a project that provides AWS customers with a series of in-depth and customizable dashboards for the most comprehensive cost and usage details to help optimize cost, track usage goals, and achieve operational excellence. 19 | 20 | This repository contains following elements: 21 | * [data-exports](/data-exports) - a Cloud Formation Templates for AWS Data Exports, such as Cost and Usage Report 2.0 and others. This allows a replication of Exports from your Management Account(s) to a Dedicated Data Collection Accounts as well as aggregation of multiple Exports from a set of Linked Accounts. 22 | * [data-collection](/data-collection) - a set of Cloud Formation Templates for collecting infrastructure operational data from Management and Linked Accounts. Such as data from AWS Trusted Advisor, AWS Compute Optimizer, Inventories, Pricing, AWS Health, AWS Support Cases etc. See more about types of data collected [here](/data-collection). 23 | * [case-summarization](/case-summarization) - an additional Cloud Formation Template for deploying the AWS Support Case Summarization plugin that offers the capability to summarize cases through Generative AI powered by Amazon Bedrock. 24 | * [rls](/rls) - a stack for managing Row Level Security for CID Dashboards. 25 | * [security-hub](/security-hub) - Collection of data from AWS Security Hub. 26 | 27 | All Data Collections can be used independently from Dashboards. Typically data collections store data on [Amazon S3 Bucket](https://aws.amazon.com/s3/) and provide [AWS Glue](https://aws.amazon.com/glue/) tables and [Amazon Athena](https://aws.amazon.com/athena/) Views to explore and use these data. 28 | 29 | ### Other AWS Services 30 | * [Collection of AWS Config data](https://github.com/aws-samples/config-resource-compliance-dashboard) 31 | 32 | ### Multi-cloud data 33 | * [Collection of Azure Cost Data](https://github.com/aws-samples/aws-data-pipelines-for-azure-storage/) 34 | * [Collection of GCP Cost Data](https://github.com/awslabs/cid-gcp-cost-dashboard/) 35 | * [Collection of OCI Cost Data](https://github.com/awslabs/cid-oci-cost-dashboard/) 36 | 37 | ## Architecture of Data Exports 38 | ![Architecture of Data Exports](.images/architecture-data-exports.png "Architecture of Data Exports") 39 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) delivers daily the Cost & Usage Report (CUR2) to an [Amazon S3 Bucket](https://aws.amazon.com/s3/) in the Management Account. 40 | 2. [Amazon S3](https://aws.amazon.com/s3/) replication rule copies Export data to a dedicated Data Collection Account S3 bucket automatically. 41 | 3. [Amazon Athena](https://aws.amazon.com/athena/) allows querying data directly from the S3 bucket using an [AWS Glue](https://aws.amazon.com/glue/) table schema definition. 42 | 4. [Amazon QuickSight](https://aws.amazon.com/quicksight/) datasets can read from [Amazon Athena](https://aws.amazon.com/athena/). Check Cloud Intelligence Dashboards. 43 | 44 | See more in [data-exports](/data-exports). 45 | 46 | 47 | 48 | ## Architecture of Data Collection 49 | ![Architecture of Advanced Data Collection](.images/architecture-data-collection.png "Architecture of Advanced Data Collection") 50 | 1. The Advanced Data Collection can be deployed to enable advanced dashboards based on [AWS Trusted Advisor](https://aws.amazon.com/trustedadvisor/), [AWS Health Events](https://docs.aws.amazon.com/health/latest/ug/getting-started-phd.html) and other sources. Additional data is retrieved from [AWS Organization](https://aws.amazon.com/organizations/) or Linked Accounts. In this case [Amazon EventBridge](https://aws.amazon.com/eventbridge/) rule triggers an [AWS Step Functions](https://aws.amazon.com/step-functions/) for data collection modules on a configurable schedule. 51 | 52 | 2. The "Account Collector" [AWS Lambda](https://aws.amazon.com/lambda/) in AWS Step Functions retrieves linked account details using [AWS Organizations API](https://docs.aws.amazon.com/organizations/latest/APIReference/Welcome.html). 53 | 54 | 3. The "Data Collection" Lambda function in AWS Step Functions assumes role in each linked account to retrieve account-specific data via [AWS SDK](https://aws.amazon.com/sdk-for-python/). 55 | 56 | 4. Retrieved data is stored in a centralized [Amazon S3 Bucket](https://aws.amazon.com/s3/). 57 | 58 | 5. Advanced Cloud Intelligence Dashboards leverage [Amazon Athena](https://aws.amazon.com/athena/) and [Amazon QuickSight](https://aws.amazon.com/quicksight/) for comprehensive data analysis. 59 | 60 | See more details in [data-collection](/data-collection). 61 | 62 | 63 | ## Cost 64 | The following table provides a sample cost breakdown for deploying of Foundational Dashboards with the default parameters in the US East (N. Virginia) Region for one month. 65 | 66 | | AWS Service | Dimensions | Monthly Cost [USD] | 67 | |---------------------------------|-------------------------------|--------------------| 68 | | S3 | Monthly storage | $5-10* | 69 | | AWS Lambda | On the schedule 1/14 days | $<3* | 70 | | AWS Step Functions | On the schedule 1/14 days | $<3* | 71 | | AWS Glue Crawler | On schedule | $<3* | 72 | | AWS Athena | Data scanned monthly | $15* | 73 | | **Total Estimated Monthly Cost** | | **<$50** | 74 | 75 | \* Costs are relative to the size of collected data (number of workloads, modules activated, AWS Accounts, Regions etc) and configured data collection frequency. 76 | 77 | Pleas use AWS Pricing Calculator for precise estimation. 78 | 79 | ## Prerequisites 80 | You need access to AWS Accounts. We recommend deployment of the Data Collection in a dedicated Data Collection Account, other than your Management (Payer) Account. You can use it to aggregate data from multiple Management (Payer) Accounts or multiple Linked Accounts. 81 | 82 | If you do not have access to the Management/Payer Account, you can still collect some types fo data across multiple Linked accounts. 83 | 84 | ## Regions 85 | Make sure you are installing data collection in the same region where you are going to use the data to avoid cross region charges. 86 | 87 | | Region Name | Region Code | Available | 88 | |:------------ | :-------------| :-------------| 89 | | Africa (Cape Town) | af-south-1 | | 90 | | Asia Pacific (Tokyo) | ap-northeast-1 | :heavy_check_mark: | 91 | | Asia Pacific (Seoul) | ap-northeast-2 | :heavy_check_mark: | 92 | | Asia Pacific (Mumbai) | ap-south-1 | :heavy_check_mark: | 93 | | Asia Pacific (Singapore) | ap-southeast-1 | :heavy_check_mark: | 94 | | Asia Pacific (Sydney) | ap-southeast-2 | :heavy_check_mark: | 95 | | Asia Pacific (Jakarta) | ap-southeast-3 | | 96 | | Canada (Central) | ca-central-1 | :heavy_check_mark: | 97 | | China (Beijing) | cn-north-1 | | 98 | | Europe (Frankfurt) | eu-central-1 | :heavy_check_mark: | 99 | | Europe (Zurich) | eu-central-2 | | 100 | | Europe (Stockholm) | eu-north-1 | :heavy_check_mark: | 101 | | Europe (Milan) | eu-south-1 | | 102 | | Europe (Spain) | eu-south-2 | | 103 | | Europe (Ireland) | eu-west-1 | :heavy_check_mark: | 104 | | Europe (London) | eu-west-2 | :heavy_check_mark: | 105 | | Europe (Paris) | eu-west-3 | :heavy_check_mark: | 106 | | South America (São Paulo) | sa-east-1 | :heavy_check_mark: | 107 | | US East (N. Virginia) | us-east-1 | :heavy_check_mark: | 108 | | US East (Ohio) | us-east-2 | :heavy_check_mark: | 109 | | AWS GovCloud (US-East) | us-gov-east-1 | | 110 | | AWS GovCloud (US-West) | us-gov-west-1 | | 111 | | US West (Oregon) | us-west-2 | :heavy_check_mark: | 112 | 113 | 114 | ## Deployment and Cleanup Steps 115 | Reference to folders. 116 | * [data-exports](/data-exports) 117 | * [data-collection](/data-collection) 118 | * [case-summarization](/case-summarization) 119 | * [rls](/rls) 120 | * [security-hub](/security-hub) 121 | 122 | ## Changelogs 123 | Check [Releases](/../../releases) 124 | 125 | ## Feedback 126 | Please reference to [this page](https://catalog.workshops.aws/awscid/en-US/feedback-support) 127 | 128 | ## Contribution 129 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 130 | 131 | ## Security 132 | When you build systems on AWS infrastructure, security responsibilities are shared between you and AWS. This [shared responsibility 133 | model](https://aws.amazon.com/compliance/shared-responsibility-model/) reduces your operational burden because AWS operates, manages, and 134 | controls the components including the host operating system, the virtualization layer, and the physical security of the facilities in 135 | which the services operate. For more information about AWS security, visit [AWS Cloud Security](http://aws.amazon.com/security/). 136 | 137 | See [SECURITY](SECURITY.md) for more information. 138 | 139 | ## License 140 | This project is licensed under the Apache-2.0 License. See the [LICENSE](LICENSE) file. 141 | 142 | ## Notices 143 | Dashboards and their content: (a) are for informational purposes only, (b) represents current AWS product offerings and practices, which are subject to change without notice, and (c) does not create any commitments or assurances from AWS and its affiliates, suppliers or licensors. AWS content, products or services are provided “as is” without warranties, representations, or conditions of any kind, whether express or implied. The responsibilities and liabilities of AWS to its customers are controlled by AWS agreements, and this document is not part of, nor does it modify, any agreement between AWS and its customers. 144 | 145 | 146 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Reporting Security Issues 2 | 3 | We take all security reports seriously. 4 | When we receive such reports, 5 | we will investigate and subsequently address 6 | any potential vulnerabilities as quickly as possible. 7 | If you discover a potential security issue in this project, 8 | please notify AWS/Amazon Security via our 9 | [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) 10 | or directly via email to [AWS Security](mailto:aws-security@amazon.com) and [CID Team](mailto:cloud-intelligence-dashboards@amazon.com). 11 | Please do *not* create a public GitHub issue in this project. 12 | 13 | Also please check Security FAQ [here](https://catalog.workshops.aws/awscid/en-US/faqs#security). 14 | -------------------------------------------------------------------------------- /case-summarization/GUARDRAIL.md: -------------------------------------------------------------------------------- 1 | ## AWS Bedrock Guardrail 2 | Amazon Bedrock Guardrail is a crucial security feature for generative AI applications that helps implement safeguards based on specific use cases and responsible AI policies. It provides an additional layer of protection on top of the native safeguards offered by foundation models (FMs)[1][2]. 3 | 4 | ## Key Features and Importance 5 | 6 | Amazon Bedrock Guardrails offers several important security features: 7 | 8 | 1. **Content Filtering**: It helps block harmful content by evaluating both user inputs and model responses. The system can filter out content related to hate speech, insults, sexual content, violence, and misconduct[2]. 9 | 10 | 2. **Topic Restrictions**: Organizations can define specific topics to avoid, ensuring that interactions remain relevant to their business and align with company policies[2]. 11 | 12 | 3. **Sensitive Information Protection**: The system can detect and redact personally identifiable information (PII) in user inputs and model responses, helping to protect user privacy[2][3]. 13 | 14 | 4. **Custom Word Filtering**: It allows the configuration of custom words or phrases to be blocked, including profanity or specific terms like competitor names[2]. 15 | 16 | 5. **Hallucination Detection**: Contextual grounding checks help detect and filter out hallucinations in model responses, ensuring more accurate and trustworthy information[2]. 17 | 18 | ## Security Importance 19 | 20 | The importance of Amazon Bedrock Guardrails for security cannot be overstated: 21 | 22 | 1. **Enhanced Content Safety**: It can block up to 85% more harmful content compared to native FM protections, significantly improving the safety of AI applications[2]. 23 | 24 | 2. **Consistent Security Across Models**: Guardrails work with all large language models in Amazon Bedrock, providing a uniform level of security regardless of the underlying model[2]. 25 | 26 | 3. **Customizable Safeguards**: Organizations can create multiple guardrails with different configurations, tailoring security measures to specific applications and use cases[1][3]. 27 | 28 | 4. **Compliance and Responsible AI**: By allowing fine-tuned control over content and interactions, Guardrails help organizations adhere to their responsible AI policies and maintain regulatory compliance[2]. 29 | 30 | 5. **Protection Against Prompt Attacks**: The system safeguards against prompt injection and jailbreak attempts, enhancing overall security[2]. 31 | 32 | Amazon Bedrock Guardrails plays a vital role in ensuring that generative AI applications remain safe, relevant, and aligned with organizational policies. By providing robust, customizable security features, it enables businesses to leverage the power of AI while mitigating potential risks associated with harmful or inappropriate content[1][2][3]. 33 | 34 | ### Reasonable Defaults 35 | 36 | This plugin comes with the following reasonable defaults that can be overriden through the parameters exposed by the CloudFormation template: 37 | 38 | | Parameter | Description | Default | 39 | | --- | --- | --- | 40 | | BlockedInputMessage | Message to return when the Amazon Bedrock Guardrail blocks a prompt. | {"executive_summary":"Amazon Bedrock Guardrails has blocked the AWS Support Case Summarization.","proposed_solutions":"","actions":"","references":[],"tam_involved":"","feedback":""} | 41 | | BlockedOutputMessage | Message to return when the Amazon Bedrock Guardrail blocks a model response | '' | 42 | | IncludeSexualContentFilter | Whether to include Sexual Content Filter in the Guardrail or not | 'yes' | 43 | | SexualContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces. | 'HIGH' | 44 | | SexualContentFilterOutputStrength | The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 45 | | IncludeViolentContentFilter | Whether to include Violent Content Filter in the Guardrail or not | 'yes' | 46 | | ViolentContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 47 | | ViolentContentFilterOutputStrength | The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 48 | | IncludeHateContentFilter | Whether to include Violent Content Filter in the Guardrail or not | 'yes' | 49 | | HateContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 50 | | HateContentFilterOutputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 51 | | IncludeInsultsContentFilter | Whether to include Insults Content Filter in the Guardrail or not | 'yes' | 52 | | InsultsContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 53 | | InsultsContentFilterOutputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 54 | | IncludeMisconductContentFilter | Whether to include Insults Content Filter in the Guardrail or not | 'yes' | 55 | | MisconductContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 56 | | MisconductContentFilterOutputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 57 | | IncludePromptAttackContentFilter | Whether to include Insults Content Filter in the Guardrail or not | 'yes' | 58 | | PromptAttackContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' | 59 | 60 | ### References & Further reading 61 | 62 | * [1] How Amazon Bedrock Guardrails works https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-how.html 63 | * [2] Generative AI Data Governance - Amazon Bedrock Guardrails - AWS https://aws.amazon.com/bedrock/guardrails/ 64 | * [3] Stop harmful content in models using Amazon Bedrock Guardrails https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html 65 | 66 | ## Usage 67 | 68 | This stack will deploy a minimalistic Amazon Bedrock Guardrail that will filter out any inputs or outputs that can be assimilated to prompt hacking, sexual, violent, misconduct, hatred speech or insults. Any additional fine-tuning of filters can be acheived by customizing this template. 69 | 70 | ## Support and Contribution 71 | 72 | See [CONTRIBUTING](../../../CONTRIBUTING.md) for more information. 73 | 74 | ## Security 75 | 76 | See [SECURITY](../../../SECURITY.md) for more information. 77 | 78 | ## License 79 | 80 | This project is licensed under the Apache-2.0 License. 81 | 82 | -------------------------------------------------------------------------------- /case-summarization/README.md: -------------------------------------------------------------------------------- 1 | ## AWS Support Case Summarization Plugin 2 | 3 | ### About 4 | 5 | This plugin is aimed at augmenting the exerience of the AWS Support Cases Radar which is part of the [Cloud Intelligence Dashboards Framework](https://catalog.workshops.aws/awscid) by leveraging Generative AI powered by Amazon Bedrock to summarize AWS Support Case Communications and help customers achieve operation excellence. 6 | 7 | This plugin contains the following elements: 8 | * [case-summarization](README.md) - a CloudFormation Template for deploying the AWS Support Case Summarization Plugin that integrates seamlessly with the Data Collection Framework. 9 | 10 | ### Architecture 11 | 12 | ![Architecture](/plugins/support-case-summarization/images/archi.png) 13 | 14 | ### Reasonable Defaults 15 | 16 | This plugin comes with the following reasonable defaults that can be overriden through the parameters exposed by the CloudFormation template: 17 | 18 | | Parameter | Description | Default | 19 | | --- | --- | --- | 20 | | BedrockRegion | The AWS Region from which the Summarization is performed | us-east-1 | 21 | | Instructions | Additional instructions passed to the Large Language Model for the summarization process customizability | '' | 22 | | Provider | Large Language Model Provider for the summarization process customizability | Anthropic | 23 | | FoundationModel | Foundation Model to be used for the summarization process | Claude 3.5 Sonnet | 24 | | InferenceType | Summarization process Inference Type | 'ON_DEMAND' | 25 | | Temperature | Summarization process Temperature | 0 | 26 | | MaxTokens | Summarization process Maximum Tokens | 8096 | 27 | | MaxRetries | Summarization process Maximum Retries | 30 | 28 | | Timeout | Summarization process Timeout in seconds | 60 | 29 | | BatchSize | Summarization process Batch Size for parallel processing | 1 | 30 | 31 | ### Installation 32 | 33 | #### 1. Enable Amazon Bedrock Target Model Access In the Data Collection Account 34 | 35 | - See [Add or remove access to Amazon Bedrock foundation models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) for guidance. 36 | 37 | #### 2. Deploy the AWS Support Case Summarization Stack In the Data Collection Account 38 | 39 | *
 [Launch Stack >>](https://console.aws.amazon.com/cloudformation/home#/stacks/create/review?&templateURL=https://aws-managed-cost-intelligence-dashboards.s3.amazonaws.com/cfn/case-summarization/case-summarization.yaml&stackName=CidSupportCaseSummarizationStack)  
40 | 41 | 42 | ## Guardrail 43 | 44 | See [GUARDRAIL](GUARDRAIL.md) for more information. 45 | 46 | 47 | ## Support and Contribution 48 | 49 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 50 | 51 | ## Security 52 | 53 | See [SECURITY](SECURITY.md) for more information. 54 | 55 | ## Limitations 56 | 57 | As of today, the AWS Support Cases Summarization plugin does not make use of Amazon Bedrock Guardrails. See [issue](https://github.com/run-llama/llama_index/issues/17217). 58 | 59 | ## License 60 | 61 | This project is licensed under the Apache-2.0 License. 62 | -------------------------------------------------------------------------------- /case-summarization/deploy/guardrail.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: CID AWS Bedrock Guardrail Template Stack v0.0.1 3 | Metadata: 4 | AWS::CloudFormation::Interface: 5 | ParameterGroups: 6 | - Label: 7 | default: 'Amazon Bedrock Guardrail parameters' 8 | Parameters: 9 | - BlockedInputMessage 10 | - BlockedOutputMessage 11 | - IncludeSexualContentFilter 12 | - SexualContentFilterInputStrength 13 | - SexualContentFilterOutputStrength 14 | - IncludeViolentContentFilter 15 | - ViolentContentFilterInputStrength 16 | - ViolentContentFilterOutputStrength 17 | - IncludeHateContentFilter 18 | - HateContentFilterInputStrength 19 | - HateContentFilterOutputStrength 20 | - IncludeInsultsContentFilter 21 | - InsultsContentFilterInputStrength 22 | - InsultsContentFilterOutputStrength 23 | - IncludeMisconductContentFilter 24 | - MisconductContentFilterInputStrength 25 | - MisconductContentFilterOutputStrength 26 | - IncludePromptAttackContentFilter 27 | - Label: 28 | default: 'Technical parameters' 29 | Parameters: 30 | - CFDataName 31 | - ResourcePrefix 32 | 33 | Parameters: 34 | CFDataName: 35 | Type: String 36 | Description: The name of what this cf is doing. 37 | Default: bedrock-guardrail 38 | ResourcePrefix: 39 | Type: String 40 | Description: This prefix will be placed in front of all resources created. Note you may wish to add a dash at the end to make more readable (e.g. 'prefix-'). This parameter CANNOT BE UPDATED. Delete and re-create stack if needed an update. 41 | Default: "CID-DC-" 42 | BlockedInputMessage: 43 | Type: String 44 | Description: Message to return when the Amazon Bedrock Guardrail blocks a prompt. 45 | MaxLength: 500 46 | Default: '{"executive_summary":"Amazon Bedrock Guardrails has blocked the AWS Support Case Summarization.","proposed_solutions":"","actions":"","references":[],"tam_involved":"","feedback":""}' 47 | BlockedOutputMessage: 48 | Type: String 49 | Description: Message to return when the Amazon Bedrock Guardrail blocks a model response. 50 | MaxLength: 500 51 | Default: '{"executive_summary":"Amazon Bedrock Guardrails has blocked the AWS Support Case Summarization.","proposed_solutions":"","actions":"","references":[],"tam_involved":"","feedback":""}' 52 | IncludeSexualContentFilter: 53 | Type: String 54 | Description: "Whether to include Sexual Content Filter in the Guardrail or not" 55 | AllowedValues: ['yes', 'no'] 56 | Default: 'yes' 57 | SexualContentFilterInputStrength: 58 | Type: String 59 | Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 60 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 61 | Default: 'HIGH' 62 | SexualContentFilterOutputStrength: 63 | Type: String 64 | Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 65 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 66 | Default: 'HIGH' 67 | IncludeViolentContentFilter: 68 | Type: String 69 | Description: "Whether to include Violent Content Filter in the Guardrail or not" 70 | AllowedValues: ['yes', 'no'] 71 | Default: 'yes' 72 | ViolentContentFilterInputStrength: 73 | Type: String 74 | Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 75 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 76 | Default: 'HIGH' 77 | ViolentContentFilterOutputStrength: 78 | Type: String 79 | Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 80 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 81 | Default: 'HIGH' 82 | IncludeHateContentFilter: 83 | Type: String 84 | Description: "Whether to include Violent Content Filter in the Guardrail or not" 85 | AllowedValues: ['yes', 'no'] 86 | Default: 'yes' 87 | HateContentFilterInputStrength: 88 | Type: String 89 | Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 90 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 91 | Default: 'HIGH' 92 | HateContentFilterOutputStrength: 93 | Type: String 94 | Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 95 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 96 | Default: 'HIGH' 97 | IncludeInsultsContentFilter: 98 | Type: String 99 | Description: "Whether to include Insults Content Filter in the Guardrail or not" 100 | AllowedValues: ['yes', 'no'] 101 | Default: 'yes' 102 | InsultsContentFilterInputStrength: 103 | Type: String 104 | Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 105 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 106 | Default: 'HIGH' 107 | InsultsContentFilterOutputStrength: 108 | Type: String 109 | Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 110 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 111 | Default: 'HIGH' 112 | IncludeMisconductContentFilter: 113 | Type: String 114 | Description: "Whether to include Misconduct Content Filter in the Guardrail or not" 115 | AllowedValues: ['yes', 'no'] 116 | Default: 'yes' 117 | MisconductContentFilterInputStrength: 118 | Type: String 119 | Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 120 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 121 | Default: 'HIGH' 122 | MisconductContentFilterOutputStrength: 123 | Type: String 124 | Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 125 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 126 | Default: 'HIGH' 127 | IncludePromptAttackContentFilter: 128 | Type: String 129 | Description: "Whether to include Prompt Attack Content Filter in the Guardrail or not" 130 | AllowedValues: ['yes', 'no'] 131 | Default: 'yes' 132 | PromptAttackContentFilterInputStrength: 133 | Type: String 134 | Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces." 135 | AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH'] 136 | Default: 'HIGH' 137 | 138 | Conditions: 139 | DeploySexualContentFilter: !Equals [ !Ref IncludeSexualContentFilter, "yes"] 140 | DeployViolentContentFilter: !Equals [ !Ref IncludeViolentContentFilter, "yes"] 141 | DeployHateContentFilter: !Equals [ !Ref IncludeHateContentFilter, "yes"] 142 | DeployInsultsContentFilter: !Equals [ !Ref IncludeInsultsContentFilter, "yes"] 143 | DeployMisconductContentFilter: !Equals [ !Ref IncludeMisconductContentFilter, "yes"] 144 | DeployPromptAttackContentFilter: !Equals [ !Ref IncludePromptAttackContentFilter, "yes"] 145 | 146 | Resources: 147 | BedrockGuardrail: 148 | Type: AWS::Bedrock::Guardrail 149 | Properties: 150 | Name: !Sub "${ResourcePrefix}${CFDataName}" 151 | Description: Amazon Bedrock Guardrail 152 | BlockedInputMessaging: !Ref BlockedInputMessage 153 | BlockedOutputsMessaging: !Ref BlockedOutputMessage 154 | ContentPolicyConfig: 155 | FiltersConfig: 156 | - !If 157 | - DeploySexualContentFilter 158 | - InputStrength: !Ref SexualContentFilterInputStrength 159 | OutputStrength: !Ref SexualContentFilterOutputStrength 160 | Type: SEXUAL 161 | - !Ref AWS::NoValue 162 | - !If 163 | - DeployViolentContentFilter 164 | - InputStrength: !Ref ViolentContentFilterInputStrength 165 | OutputStrength: !Ref ViolentContentFilterOutputStrength 166 | Type: VIOLENCE 167 | - !Ref AWS::NoValue 168 | - !If 169 | - DeployHateContentFilter 170 | - InputStrength: !Ref HateContentFilterInputStrength 171 | OutputStrength: !Ref HateContentFilterOutputStrength 172 | Type: HATE 173 | - !Ref AWS::NoValue 174 | - !If 175 | - DeployInsultsContentFilter 176 | - InputStrength: !Ref InsultsContentFilterInputStrength 177 | OutputStrength: !Ref InsultsContentFilterOutputStrength 178 | Type: INSULTS 179 | - !Ref AWS::NoValue 180 | - !If 181 | - DeployMisconductContentFilter 182 | - InputStrength: !Ref MisconductContentFilterInputStrength 183 | OutputStrength: !Ref MisconductContentFilterOutputStrength 184 | Type: MISCONDUCT 185 | - !Ref AWS::NoValue 186 | - !If 187 | - DeployPromptAttackContentFilter 188 | - InputStrength: !Ref PromptAttackContentFilterInputStrength 189 | OutputStrength: 'NONE' 190 | Type: PROMPT_ATTACK 191 | - !Ref AWS::NoValue 192 | WordPolicyConfig: 193 | ManagedWordListsConfig: 194 | - Type: PROFANITY 195 | Metadata: 196 | cfn-lint: 197 | config: 198 | ignore_checks: 199 | - E3032 # Guardrail FiltersConfig Variabilization False Positive 200 | 201 | BedrockGuardrailVersion: 202 | Type: AWS::Bedrock::GuardrailVersion 203 | Properties: 204 | Description: Amazon Bedrock Guardrail 205 | GuardrailIdentifier: !Ref BedrockGuardrail 206 | -------------------------------------------------------------------------------- /case-summarization/images/archi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/case-summarization/images/archi.png -------------------------------------------------------------------------------- /case-summarization/layer/build-layer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script builds the Lambda Layer that contains Pydantic & Llama_index 3 | 4 | git_root=$(git rev-parse --show-toplevel) 5 | # shellcheck disable=SC2155 disable=SC2002 6 | export version=$(cat "${git_root}/case-summarization/utils/version.json" | jq .version --raw-output) 7 | export prefix='llm' 8 | cd "$(dirname "$0")" || exit 9 | 10 | function build_layer { 11 | echo 'Building a layer' 12 | rm -rf ./python 13 | mkdir -p ./python 14 | python3 -m pip install --only-binary=:all: --platform=manylinux2014_x86_64 --target=./python --requirement=./requirements.txt 15 | du -sh ./python # must be less then 256M 16 | rm -rf "$prefix-$version.zip" 17 | zip -qr "$prefix-$version.zip" ./python 18 | ls -h -l "$prefix-$version.zip" 19 | rm -rf ./python 20 | } 21 | 22 | build_layer 1>&2 23 | 24 | ls "$prefix-$version.zip" -------------------------------------------------------------------------------- /case-summarization/layer/publish-lambda-layer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script can be used for release or testing of lambda layers upload. 3 | 4 | # First build layer 5 | git_root=$(git rev-parse --show-toplevel) 6 | cd "${git_root}/case-summarization/layer/" || exit 7 | layer=$(./build-layer.sh) 8 | 9 | # Then publish on s3 10 | export AWS_REGION=us-east-1 11 | export STACK_SET_NAME=LayerBuckets 12 | aws cloudformation list-stack-instances \ 13 | --stack-set-name $STACK_SET_NAME \ 14 | --query 'Summaries[].[StackId,Region]' \ 15 | --output text | 16 | while read -r stack_id region; do 17 | echo "uploading $layer to $region" 18 | # shellcheck disable=SC2016 19 | bucket=$(aws cloudformation list-stack-resources --stack-name "$stack_id" \ 20 | --query 'StackResourceSummaries[?LogicalResourceId == `LayerBucket`].PhysicalResourceId' \ 21 | --region "$region" --output text) 22 | # shellcheck disable=SC2181 23 | output=$(aws s3api put-object \ 24 | --bucket "$bucket" \ 25 | --key "cid-llm-lambda-layer/$layer" \ 26 | --body "./$layer") 27 | # shellcheck disable=SC2181 disable=SC2002 28 | if [ $? -ne 0 ]; then 29 | echo "Error: $output" 30 | else 31 | echo "Uploaded successfuly" 32 | fi 33 | done 34 | 35 | echo 'Cleanup' 36 | rm -vf "./$layer" 37 | 38 | echo 'Done' -------------------------------------------------------------------------------- /case-summarization/layer/requirements.txt: -------------------------------------------------------------------------------- 1 | llama-index-llms-bedrock>=0.3.3 2 | pydantic-core 3 | -------------------------------------------------------------------------------- /case-summarization/utils/bump-release.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import git 4 | import json 5 | 6 | repo = git.Repo('.') 7 | 8 | print(repo.git.execute('git checkout main'.split())) 9 | print(repo.git.execute('git pull'.split())) 10 | 11 | 12 | old_ver = json.load(open("data-collection/utils/version.json"))['version'] 13 | 14 | print (old_ver) 15 | bump='patch' 16 | if len(sys.argv)>1: 17 | bump = sys.argv[1] 18 | 19 | maj, minor, patch = map(int, old_ver.split('.')) 20 | 21 | if bump=='patch': 22 | new_ver = '.'.join(map(str,[maj, minor, patch + 1])) 23 | elif bump=='minor': 24 | new_ver = '.'.join(map(str,[maj, minor + 1, 0])) 25 | else: 26 | raise NotImplementedError('only patch and minor are implemented') 27 | 28 | print(repo.git.execute(f"git checkout -b release/{new_ver}".split())) 29 | 30 | 31 | tx = open("data-collection/utils/version.json").read() 32 | with open("data-collection/utils/version.json", "w") as f: 33 | f.write(tx.replace(old_ver,new_ver)) 34 | 35 | 36 | filenames = [ 37 | 'data-collection/deploy/deploy-data-read-permissions.yaml', 38 | 'data-collection/deploy/deploy-data-collection.yaml', 39 | 'data-collection/deploy/deploy-in-management-account.yaml', 40 | 'data-collection/deploy/deploy-in-linked-account.yaml', 41 | "data-collection/utils/version.json", 42 | ] 43 | for filename in filenames: 44 | tx = open(filename).read() 45 | with open(filename, "w") as f: 46 | f.write(tx.replace(f"v{old_ver}", f"v{new_ver}")) 47 | 48 | 49 | print(repo.git.execute('git diff HEAD --unified=0'.split())) 50 | 51 | print('to undo:\n git checkout HEAD -- cfn-templates/cid-cfn.yml cid/_version.py') 52 | print(f"to continue:\n git commit -am 'release {new_ver}'; git push origin 'release/{new_ver}'") 53 | -------------------------------------------------------------------------------- /case-summarization/utils/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2016,SC2086,SC2162 3 | # This script can be used for release 4 | 5 | export CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards 6 | 7 | code_path=$(git rev-parse --show-toplevel)/case-summarization/deploy 8 | 9 | echo "sync to central bucket" 10 | aws s3 sync $code_path/ s3://$CENTRAL_BUCKET/cfn/case-summarization/ -------------------------------------------------------------------------------- /case-summarization/utils/upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2086 3 | # This script uploads CloudFormation files to S3 bucket. Can be used with any testing bucket or prod. 4 | # see also README.md 5 | 6 | if [ -n "$1" ]; then 7 | bucket=$1 8 | else 9 | echo "ERROR: First parameter not supplied. Provide a bucket name. aws-well-architected-labs for prod aws-wa-labs-staging for stage " 10 | echo " prod aws-well-architected-labs " 11 | exit 1 12 | fi 13 | code_path=$(git rev-parse --show-toplevel)/case-summarization/deploy 14 | 15 | echo "Sync to $bucket" 16 | aws s3 sync $code_path/ s3://$bucket/cfn/case-summarization/ 17 | echo 'Done' 18 | -------------------------------------------------------------------------------- /case-summarization/utils/version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1" 3 | } -------------------------------------------------------------------------------- /data-collection/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | Starting from release v3.0.4 all release notes are captured in https://github.com/awslabs/cid-framework/releases 4 | 5 | ## v3.0.2 6 | Bug fixes 7 | 8 | ## v3.0.1 9 | Imporve performace of Data Collection for Cost Optimization Hub 10 | 11 | ## v3.0.0 12 | Data Collection had a major refactoring and improvment. Also changed the mechanism of data collection lavereging Step Functions and added collection of the new services like AWS Cost Optimization Hub, AWS Backup 13 | 14 | ## v2.1 15 | Added new service AWS Anomaly Detection 16 | 17 | ## v2 18 | Data Collection was updated to collect data from multiple AWS Organizations 19 | 20 | ## v1 21 | Data Collection lab was published on the Well Architected Labs site 22 | -------------------------------------------------------------------------------- /data-collection/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTION GUIDE 2 | 3 | # Development process 4 | 5 | ## Testing environment 6 | You can test this lab in a dedicated account that preferably has the following assets: 7 | * EC2 instances, running more than 14 days (for Compute Optimizer and CE Rightsizing) 8 | * At least one EBS and one volume Snapshot 9 | * At least one custom AMI created from one of the snapshots 10 | * Activated Enterprise Support (for TA module) 11 | * An RDS cluster or single instance 12 | * An ECS cluster with one service deployed ([wordpress](https://aws.amazon.com/blogs/containers/running-wordpress-amazon-ecs-fargate-ecs/) will work fine) 13 | * A TransitGateway with at least one attachment 14 | * AWS Organization with trusted access enabled (see [Activate trusted access with AWS Organizations](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/stacksets-orgs-activate-trusted-access.html)) 15 | * An S3 bucket to store the CloudFormation templates that deploy the infrastructure for the optimization data collection components 16 | 17 | ## Prerequisites for local environment 18 | 19 | ### General 20 | 21 | * [cfn_nag_scan](https://github.com/stelligent/cfn_nag#installation) 22 | * python3.9+ 23 | * `pip3 install -U boto3 pytest cfn-flip pylint checkov` 24 | * Configured AWS credentials 25 | * Install and configure [git-secrets](https://github.com/awslabs/git-secrets#installing-git-secrets) 26 | 27 | ## Testing 28 | 29 | ### AWS access credentials 30 | 31 | For the purpose of testing, Python and shell scripts will make use of default AWS credentials setup in your ~/.aws folder. 32 | 33 | Make sure you configure credentials for an organizations management account that will have the necessary permission to retrieve information from itself and other member accounts. 34 | 35 | `aws configure` can be used to setup the AWS credentials in your local environment. 36 | 37 | ### Steps 38 | 39 | 1. (One time) Clone the project locally and install dependencies 40 | 41 | ```bash 42 | git clone git@github.com:awslabs/cid-framework.git 43 | cd cid-framework 44 | pip3 install -U boto3 pytest cfn-flip pylint bandit cfn-lint checkov 45 | ``` 46 | 47 | Create a test bucket in test account. You can use any bucket. 48 | 49 | ```bash 50 | export account_id=$(aws sts get-caller-identity --query "Account" --output text ) 51 | export bucket=cid-$account_id-test 52 | 53 | aws s3api create-bucket --bucket $bucket 54 | ``` 55 | 56 | 2. Check the quality of code: 57 | 58 | Cloud Formation: 59 | ```bash 60 | ./utils/lint.sh 61 | ``` 62 | 63 | Pylint: 64 | ```bash 65 | python3 ./utils/pylint.py 66 | ``` 67 | 68 | 69 | 3. Upload the code to a bucket and run integration tests in your testing environment 70 | 71 | ```bash 72 | ./test/run-test-from-scratch.sh --no-teardown 73 | ``` 74 | 75 | The test will install stacks from scratch in a single account, then it will check the presence of Athena tables. After running tests, it will delete the stacks and all artifacts that are not deleted by CFN. You can avoid teardown by providing a flag `--no-teardown`. 76 | 77 | 4. Create a merge request. 78 | 79 | 80 | # Release process (CID Team only) 81 | All Cloud Formation Templates are uploaded to buckets `aws-managed-cost-intelligence-dashboards*`. 82 | 83 | ```bash 84 | ./data-collection/utils/release.sh 85 | ``` 86 | -------------------------------------------------------------------------------- /data-collection/README.md: -------------------------------------------------------------------------------- 1 | ## CID Data Collection 2 | 3 | ### About 4 | 5 | This projects demonstrates usage of AWS API for collecting various types of usage data. 6 | 7 | For deployment and additional information reference to the [documentation](https://catalog.workshops.aws/awscid/data-collection). 8 | 9 | [![Documentation](/.images/documentation.svg)](https://catalog.workshops.aws/awscid/data-collection) 10 | 11 | 12 | ### Architecture 13 | 14 | ![Architecture](/.images/architecture-data-collection-detailed.png) 15 | 16 | 1. [Amazon EventBridge](https://aws.amazon.com/eventbridge/) rule invokes [AWS Step Functions](https://aws.amazon.com/step-functions/) for every deployed data collection module based on schedule. 17 | 2. The Step Function launches a [AWS Lambda](https://aws.amazon.com/lambda/) function **Account Collector** that assumes **Read Role** in the Management accounts and retrieves linked accounts list via [AWS Organizations API](https://docs.aws.amazon.com/organizations/latest/userguide/orgs_integrate_services.html). 18 | 3. Step Functions launches **Data Collection Lambda** function for each collected Account. 19 | 4. Each data collection module Lambda function assumes an [IAM](https://aws.amazon.com/iam/) role in linked accounts and retrieves respective optimization data via [AWS SDK for Python (Boto3)](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html). Retrieved data is aggregated in an [Amazon S3](https://aws.amazon.com/s3/) bucket. 20 | 5. Once data is stored in the S3 bucket, Step Functions trigger an [AWS Glue](https://aws.amazon.com/glue/) crawler which creates or updates the table in the [AWS Glue Data Catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#data-catalog-intro). 21 | 6. Collected data is visualized with the [Cloud Intelligence Dashboards](https://aws.amazon.com/solutions/implementations/cloud-intelligence-dashboards/) using [Amazon QuickSight](https://aws.amazon.com/quicksight/) to get optimization recommendations and insights. 22 | 23 | 24 | ### Modules 25 | List of modules and objects collected: 26 | | Module Name | AWS Services | Collected In | Details | 27 | | --- | --- | --- | --- | 28 | | `organization` | [AWS Organizations](https://aws.amazon.com/organizations/) | Management Accounts | | 29 | | `budgets` | [AWS Budgets](https://aws.amazon.com/aws-cost-management/aws-budgets/) | Linked Accounts | | 30 | | `compute-optimizer` | [AWS Compute Optimizer](https://aws.amazon.com/compute-optimizer/) | Management Accounts | Requires [Enablement of Compute Optimizer](https://aws.amazon.com/compute-optimizer/getting-started/#:~:text=Opt%20in%20for%20Compute%20Optimizer,created%20automatically%20in%20your%20account.) | 31 | | `trusted-advisor` | [AWS Trusted Advisor](https://aws.amazon.com/premiumsupport/technology/trusted-advisor/) | Linked Accounts | Requires Business, Enterprise or On-Ramp Support Level | 32 | | `support-cases` | [AWS Support](https://aws.amazon.com/premiumsupport/) | Linked Accounts | Requires Business, Enterprise On-Ramp, or Enterprise Support plan | 33 | | `cost-explorer-cost-anomaly` | [AWS Cost Anomaly Detection](https://aws.amazon.com/aws-cost-management/aws-cost-anomaly-detection/) | Management Accounts | | 34 | | `cost-explorer-rightsizing` | [AWS Cost Explorer](https://aws.amazon.com/aws-cost-management/aws-cost-explorer/) | Management Accounts | DEPRECATED. Please use `Data Exports` for `Cost Optimization Hub` | 35 | | `inventory` | Various services | Linked Accounts | Collects `Amazon OpenSearch Domains`, `Amazon ElastiCache Clusters`, `RDS DB Instances`, `EBS Volumes`, `AMI`, `EC2 Instances`, `EBS Snapshot`, `RDS Snapshot`, `Lambda`, `RDS DB Clusters`, `EKS Clusters` | 36 | | `pricing` | Various services | Data Collection Account | Collects pricing for `Amazon RDS`, `Amazon EC2`, `Amazon ElastiCache`, `AWS Lambda`, `Amazon OpenSearch`, `AWS Compute Savings Plan` | 37 | | `rds-usage` | [Amazon RDS](https://aws.amazon.com/rds/) | Linked Accounts | Collects CloudWatch metrics for chargeback | 38 | | `transit-gateway` | [AWS Transit Gateway](https://aws.amazon.com/transit-gateway/) | Linked Accounts | Collects CloudWatch metrics for chargeback | 39 | | `ecs-chargeback` | [Amazon ECS](https://aws.amazon.com/ecs/) | Linked Accounts | | 40 | | `backup` | [AWS Backup](https://aws.amazon.com/backup/) | Management Accounts | Collects Backup Restore and Copy Jobs. Requires [activation of cross-account](https://docs.aws.amazon.com/aws-backup/latest/devguide/manage-cross-account.html#enable-cross-account) | 41 | | `health-events` | [AWS Health](https://aws.amazon.com/health/) | Management Accounts | Collect AWS Health notifications via AWS Organizational view | 42 | | `licence-manager` | [AWS License Manager](https://aws.amazon.com/license-manager/) | Management Accounts | Collect Licenses and Grants | 43 | | `aws-feeds` | N/A | Data Collection Account | Collects Blog posts and News Feeds | 44 | | `quicksight` | [Amazon QuickSight](https://aws.amazon.com/quicksight/) | Data Collection Account | Collects QuickSight User and Group information in the Data Collection Account only | 45 | 46 | 47 | ### Deployment Overview 48 | 49 | ![Deployment Architecture](/.images/architecture-data-collection-deploy.png) 50 | 51 | 1. Deploy the Advanced Data Collection Permissions CloudFormation stack to Management (Payer) AWS Account. The Permissions CloudFormation stack in the Management (Payer) Account also deploys Permissions stacks to each of Linked accounts via StackSets. 52 | 53 | 2. Deploy the Data Collection Stack to the Data Collection AWS Account 54 | 55 | 56 | For deployment and further information please reference to this [documentation](https://catalog.workshops.aws/awscid/data-collection). 57 | 58 | [![Documentation](/.images/documentation.svg)](https://catalog.workshops.aws/awscid/data-collection) 59 | 60 | 61 | ### Contributing 62 | See [CONTRIBUTING.md](CONTRIBUTING.md) 63 | -------------------------------------------------------------------------------- /data-collection/deploy/data/rds_graviton_mapping.csv: -------------------------------------------------------------------------------- 1 | Aurora,PostgreSQL,db.t2.medium,db.t4g.medium 2 | Aurora,PostgreSQL,db.t3.medium,None 3 | Aurora,PostgreSQL,db.t3.large,db.t4g.large 4 | Aurora,PostgreSQL,db.r4.large,db.r6g.large 5 | Aurora,PostgreSQL,db.r4.xlarge,db.r6g.xlarge 6 | Aurora,PostgreSQL,db.r4.2xlarge,db.r6g.2xlarge 7 | Aurora,PostgreSQL,db.r4.4xlarge,db.r6g.4xlarge 8 | Aurora,PostgreSQL,db.r4.8xlarge,db.r6g.8xlarge 9 | Aurora,PostgreSQL,db.r4.16xlarge,db.r6g.16xlarge 10 | Aurora,PostgreSQL,db.r5.large,db.r6g.large 11 | Aurora,PostgreSQL,db.r5.xlarge,db.r6g.xlarge 12 | Aurora,PostgreSQL,db.r5.2xlarge,db.r6g.2xlarge 13 | Aurora,PostgreSQL,db.r5.4xlarge,db.r6g.4xlarge 14 | Aurora,PostgreSQL,db.r5.8xlarge,db.r6g.8xlarge 15 | Aurora,PostgreSQL,db.r5.12xlarge,db.r6g.12xlarge 16 | Aurora,PostgreSQL,db.r5.16xlarge,db.r6g.16xlarge 17 | Aurora,PostgreSQL,db.r5.24xlarge,None 18 | Aurora,MySQL,db.t2.small,None 19 | Aurora,MySQL,db.t2.medium,db.t4g.medium 20 | Aurora,MySQL,db.t3.small,None 21 | Aurora,MySQL,db.t3.medium,db.t4g.medium 22 | Aurora,MySQL,db.t3.large,db.t4g.large 23 | Aurora,MySQL,db.r3.large,db.r6g.large 24 | Aurora,MySQL,db.r3.xlarge,db.r6g.xlarge 25 | Aurora,MySQL,db.r3.2xlarge,db.r6g.2xlarge 26 | Aurora,MySQL,db.r3.4xlarge,db.r6g.4xlarge 27 | Aurora,MySQL,db.r3.8xlarge,db.r6g.8xlarge 28 | Aurora,MySQL,db.r4.large,db.r6g.large 29 | Aurora,MySQL,db.r4.xlarge,db.r6g.xlarge 30 | Aurora,MySQL,db.r4.2xlarge,db.r6g.2xlarge 31 | Aurora,MySQL,db.r4.4xlarge,db.r6g.4xlarge 32 | Aurora,MySQL,db.r4.8xlarge,db.r6g.8xlarge 33 | Aurora,MySQL,db.r4.16xlarge,db.r6g.16xlarge 34 | Aurora,MySQL,db.r5.large,db.r6g.large 35 | Aurora,MySQL,db.r5.xlarge,db.r6g.xlarge 36 | Aurora,MySQL,db.r5.2xlarge,db.r6g.2xlarge 37 | Aurora,MySQL,db.r5.4xlarge,db.r6g.4xlarge 38 | Aurora,MySQL,db.r5.8xlarge,db.r6g.8xlarge 39 | Aurora,MySQL,db.r5.12xlarge,db.r6g.12xlarge 40 | Aurora,MySQL,db.r5.16xlarge,db.r6g.16xlarge 41 | Aurora,MySQL,db.r5.24xlarge,None 42 | AmazonRDS,PostgreSQL,db.t2.micro,db.t4g.micro 43 | AmazonRDS,PostgreSQL,db.t2.small,db.t4g.small 44 | AmazonRDS,PostgreSQL,db.t2.medium,db.t4g.medium 45 | AmazonRDS,PostgreSQL,db.t2.large,db.t4g.large 46 | AmazonRDS,PostgreSQL,db.t2.xlarge,db.t4g.xlarge 47 | AmazonRDS,PostgreSQL,db.t2.2xlarge,db.t4g.2xlarge 48 | AmazonRDS,PostgreSQL,db.t3.micro,db.t4g.micro 49 | AmazonRDS,PostgreSQL,db.t3.small,db.t4g.small 50 | AmazonRDS,PostgreSQL,db.t3.medium,db.t4g.medium 51 | AmazonRDS,PostgreSQL,db.t3.large,db.t4g.large 52 | AmazonRDS,PostgreSQL,db.t3.xlarge,db.t4g.xlarge 53 | AmazonRDS,PostgreSQL,db.t3.2xlarge,db.t4g.2xlarge 54 | AmazonRDS,PostgreSQL,db.m3.medium,None 55 | AmazonRDS,PostgreSQL,db.m3.large,db.m6g.large 56 | AmazonRDS,PostgreSQL,db.m3.xlarge,db.m6g.xlarge 57 | AmazonRDS,PostgreSQL,db.m3.2xlarge,db.m6g.2xlarge 58 | AmazonRDS,PostgreSQL,db.m3.4xlarge,db.m6g.4xlarge 59 | AmazonRDS,PostgreSQL,db.m3.10xlarge,db.m6g.10xlarge 60 | AmazonRDS,PostgreSQL,db.m3.16xlarge,db.m6g.16xlarge 61 | AmazonRDS,PostgreSQL,db.m4.large,db.m6g.large 62 | AmazonRDS,PostgreSQL,db.m4.xlarge,db.m6g.xlarge 63 | AmazonRDS,PostgreSQL,db.m4.2xlarge,db.m6g.2xlarge 64 | AmazonRDS,PostgreSQL,db.m4.4xlarge,db.m6g.4xlarge 65 | AmazonRDS,PostgreSQL,db.m4.10xlarge,db.m6g.10xlarge 66 | AmazonRDS,PostgreSQL,db.m4.16xlarge,db.m6g.16xlarge 67 | AmazonRDS,PostgreSQL,db.m5.large,db.m6g.large 68 | AmazonRDS,PostgreSQL,db.m5.xlarge,db.m6g.xlarge 69 | AmazonRDS,PostgreSQL,db.m5.2xlarge,db.m6g.2xlarge 70 | AmazonRDS,PostgreSQL,db.m5.4xlarge,db.m6g.4xlarge 71 | AmazonRDS,PostgreSQL,db.m5.8xlarge,db.m6g.8xlarge 72 | AmazonRDS,PostgreSQL,db.m5.12xlarge,db.m6g.12xlarge 73 | AmazonRDS,PostgreSQL,db.m5.16xlarge,db.m6g.16xlarge 74 | AmazonRDS,PostgreSQL,db.m5.24xlarge,None 75 | AmazonRDS,PostgreSQL,db.r3.large,db.r6g.large 76 | AmazonRDS,PostgreSQL,db.r3.xlarge,db.r6g.xlarge 77 | AmazonRDS,PostgreSQL,db.r3.2xlarge,db.r6g.2xlarge 78 | AmazonRDS,PostgreSQL,db.r3.4xlarge,db.r6g.4xlarge 79 | AmazonRDS,PostgreSQL,db.r3.8xlarge,db.r6g.8xlarge 80 | AmazonRDS,PostgreSQL,db.r4.large,db.r6g.large 81 | AmazonRDS,PostgreSQL,db.r4.xlarge,db.r6g.xlarge 82 | AmazonRDS,PostgreSQL,db.r4.2xlarge,db.r6g.2xlarge 83 | AmazonRDS,PostgreSQL,db.r4.4xlarge,db.r6g.4xlarge 84 | AmazonRDS,PostgreSQL,db.r4.8xlarge,db.r6g.8xlarge 85 | AmazonRDS,PostgreSQL,db.r4.16xlarge,db.r6g.16xlarge 86 | AmazonRDS,PostgreSQL,db.r5.large,db.r6g.large 87 | AmazonRDS,PostgreSQL,db.r5.xlarge,db.r6g.xlarge 88 | AmazonRDS,PostgreSQL,db.r5.2xlarge,db.r6g.2xlarge 89 | AmazonRDS,PostgreSQL,db.r5.4xlarge,db.r6g.4xlarge 90 | AmazonRDS,PostgreSQL,db.r5.8xlarge,db.r6g.8xlarge 91 | AmazonRDS,PostgreSQL,db.r5.12xlarge,db.r6g.12xlarge 92 | AmazonRDS,PostgreSQL,db.r5.16xlarge,db.r6g.16xlarge 93 | AmazonRDS,PostgreSQL,db.r5.24xlarge,None 94 | AmazonRDS,PostgreSQL,db.r5b.large,db.r6gd.large 95 | AmazonRDS,PostgreSQL,db.r5b.xlarge,db.r6gd.xlarge 96 | AmazonRDS,PostgreSQL,db.r5b.2xlarge,db.r6gd.2xlarge 97 | AmazonRDS,PostgreSQL,db.r5b.4xlarge,db.r6gd.4xlarge 98 | AmazonRDS,PostgreSQL,db.r5b.8xlarge,db.r6gd.8xlarge 99 | AmazonRDS,PostgreSQL,db.r5b.12xlarge,db.r6gd.12xlarge 100 | AmazonRDS,PostgreSQL,db.r5b.16xlarge,db.r6gd.16xlarge 101 | AmazonRDS,PostgreSQL,db.r5b.24xlarge,None 102 | AmazonRDS,MySQL,db.t2.micro,db.t4g.micro 103 | AmazonRDS,MySQL,db.t2.small,db.t4g.small 104 | AmazonRDS,MySQL,db.t2.medium,db.t4g.medium 105 | AmazonRDS,MySQL,db.t2.large,db.t4g.large 106 | AmazonRDS,MySQL,db.t2.xlarge,db.t4g.xlarge 107 | AmazonRDS,MySQL,db.t2.2xlarge,db.t4g.2xlarge 108 | AmazonRDS,MySQL,db.t3.micro,db.t4g.micro 109 | AmazonRDS,MySQL,db.t3.small,db.t4g.small 110 | AmazonRDS,MySQL,db.t3.medium,db.t4g.medium 111 | AmazonRDS,MySQL,db.t3.large,db.t4g.large 112 | AmazonRDS,MySQL,db.t3.xlarge,db.t4g.xlarge 113 | AmazonRDS,MySQL,db.t3.2xlarge,db.t4g.2xlarge 114 | AmazonRDS,MySQL,db.m3.medium,None 115 | AmazonRDS,MySQL,db.m3.large,db.m6g.large 116 | AmazonRDS,MySQL,db.m3.xlarge,db.m6g.xlarge 117 | AmazonRDS,MySQL,db.m3.2xlarge,db.m6g.2xlarge 118 | AmazonRDS,MySQL,db.m3.4xlarge,db.m6g.4xlarge 119 | AmazonRDS,MySQL,db.m3.10xlarge,db.m6g.10xlarge 120 | AmazonRDS,MySQL,db.m3.16xlarge,db.m6g.16xlarge 121 | AmazonRDS,MySQL,db.m4.large,db.m6g.large 122 | AmazonRDS,MySQL,db.m4.xlarge,db.m6g.xlarge 123 | AmazonRDS,MySQL,db.m4.2xlarge,db.m6g.2xlarge 124 | AmazonRDS,MySQL,db.m4.4xlarge,db.m6g.4xlarge 125 | AmazonRDS,MySQL,db.m4.10xlarge,db.m6g.10xlarge 126 | AmazonRDS,MySQL,db.m4.16xlarge,db.m6g.16xlarge 127 | AmazonRDS,MySQL,db.m5.large,db.m6g.large 128 | AmazonRDS,MySQL,db.m5.xlarge,db.m6g.xlarge 129 | AmazonRDS,MySQL,db.m5.2xlarge,db.m6g.2xlarge 130 | AmazonRDS,MySQL,db.m5.4xlarge,db.m6g.4xlarge 131 | AmazonRDS,MySQL,db.m5.8xlarge,db.m6g.8xlarge 132 | AmazonRDS,MySQL,db.m5.12xlarge,db.m6g.12xlarge 133 | AmazonRDS,MySQL,db.m5.16xlarge,db.m6g.16xlarge 134 | AmazonRDS,MySQL,db.m5.24xlarge,None 135 | AmazonRDS,MySQL,db.r3.large,db.r6g.large 136 | AmazonRDS,MySQL,db.r3.xlarge,db.r6g.xlarge 137 | AmazonRDS,MySQL,db.r3.2xlarge,db.r6g.2xlarge 138 | AmazonRDS,MySQL,db.r3.4xlarge,db.r6g.4xlarge 139 | AmazonRDS,MySQL,db.r3.8xlarge,db.r6g.8xlarge 140 | AmazonRDS,MySQL,db.r4.large,db.r6g.large 141 | AmazonRDS,MySQL,db.r4.xlarge,db.r6g.xlarge 142 | AmazonRDS,MySQL,db.r4.2xlarge,db.r6g.2xlarge 143 | AmazonRDS,MySQL,db.r4.4xlarge,db.r6g.4xlarge 144 | AmazonRDS,MySQL,db.r4.8xlarge,db.r6g.8xlarge 145 | AmazonRDS,MySQL,db.r4.16xlarge,db.r6g.16xlarge 146 | AmazonRDS,MySQL,db.r5.large,db.r6g.large 147 | AmazonRDS,MySQL,db.r5.xlarge,db.r6g.xlarge 148 | AmazonRDS,MySQL,db.r5.2xlarge,db.r6g.2xlarge 149 | AmazonRDS,MySQL,db.r5.4xlarge,db.r6g.4xlarge 150 | AmazonRDS,MySQL,db.r5.8xlarge,db.r6g.8xlarge 151 | AmazonRDS,MySQL,db.r5.12xlarge,db.r6g.12xlarge 152 | AmazonRDS,MySQL,db.r5.16xlarge,db.r6g.16xlarge 153 | AmazonRDS,MySQL,db.r5.24xlarge,None 154 | AmazonRDS,MySQL,db.r5b.large,db.r6gd.large 155 | AmazonRDS,MySQL,db.r5b.xlarge,db.r6gd.xlarge 156 | AmazonRDS,MySQL,db.r5b.2xlarge,db.r6gd.2xlarge 157 | AmazonRDS,MySQL,db.r5b.4xlarge,db.r6gd.4xlarge 158 | AmazonRDS,MySQL,db.r5b.8xlarge,db.r6gd.8xlarge 159 | AmazonRDS,MySQL,db.r5b.12xlarge,db.r6gd.12xlarge 160 | AmazonRDS,MySQL,db.r5b.16xlarge,db.r6gd.16xlarge 161 | AmazonRDS,MySQL,db.r5b.24xlarge,None 162 | AmazonRDS,MariaDB,db.t2.micro,db.t4g.micro 163 | AmazonRDS,MariaDB,db.t2.small,db.t4g.small 164 | AmazonRDS,MariaDB,db.t2.medium,db.t4g.medium 165 | AmazonRDS,MariaDB,db.t2.large,db.t4g.large 166 | AmazonRDS,MariaDB,db.t2.xlarge,db.t4g.xlarge 167 | AmazonRDS,MariaDB,db.t2.2xlarge,db.t4g.2xlarge 168 | AmazonRDS,MariaDB,db.t3.micro,db.t4g.micro 169 | AmazonRDS,MariaDB,db.t3.small,db.t4g.small 170 | AmazonRDS,MariaDB,db.t3.medium,db.t4g.medium 171 | AmazonRDS,MariaDB,db.t3.large,db.t4g.large 172 | AmazonRDS,MariaDB,db.t3.xlarge,db.t4g.xlarge 173 | AmazonRDS,MariaDB,db.t3.2xlarge,db.t4g.2xlarge 174 | AmazonRDS,MariaDB,db.m3.medium,None 175 | AmazonRDS,MariaDB,db.m3.large,db.m6g.large 176 | AmazonRDS,MariaDB,db.m3.xlarge,db.m6g.xlarge 177 | AmazonRDS,MariaDB,db.m3.2xlarge,db.m6g.2xlarge 178 | AmazonRDS,MariaDB,db.m3.4xlarge,db.m6g.4xlarge 179 | AmazonRDS,MariaDB,db.m3.10xlarge,db.m6g.10xlarge 180 | AmazonRDS,MariaDB,db.m3.16xlarge,db.m6g.16xlarge 181 | AmazonRDS,MariaDB,db.m4.large,db.m6g.large 182 | AmazonRDS,MariaDB,db.m4.xlarge,db.m6g.xlarge 183 | AmazonRDS,MariaDB,db.m4.2xlarge,db.m6g.2xlarge 184 | AmazonRDS,MariaDB,db.m4.4xlarge,db.m6g.4xlarge 185 | AmazonRDS,MariaDB,db.m4.10xlarge,db.m6g.10xlarge 186 | AmazonRDS,MariaDB,db.m4.16xlarge,db.m6g.16xlarge 187 | AmazonRDS,MariaDB,db.m5.large,db.m6g.large 188 | AmazonRDS,MariaDB,db.m5.xlarge,db.m6g.xlarge 189 | AmazonRDS,MariaDB,db.m5.2xlarge,db.m6g.2xlarge 190 | AmazonRDS,MariaDB,db.m5.4xlarge,db.m6g.4xlarge 191 | AmazonRDS,MariaDB,db.m5.8xlarge,db.m6g.8xlarge 192 | AmazonRDS,MariaDB,db.m5.12xlarge,db.m6g.12xlarge 193 | AmazonRDS,MariaDB,db.m5.16xlarge,db.m6g.16xlarge 194 | AmazonRDS,MariaDB,db.m5.24xlarge,None 195 | AmazonRDS,MariaDB,db.r3.large,db.r6g.large 196 | AmazonRDS,MariaDB,db.r3.xlarge,db.r6g.xlarge 197 | AmazonRDS,MariaDB,db.r3.2xlarge,db.r6g.2xlarge 198 | AmazonRDS,MariaDB,db.r3.4xlarge,db.r6g.4xlarge 199 | AmazonRDS,MariaDB,db.r3.8xlarge,db.r6g.8xlarge 200 | AmazonRDS,MariaDB,db.r4.large,db.r6g.large 201 | AmazonRDS,MariaDB,db.r4.xlarge,db.r6g.xlarge 202 | AmazonRDS,MariaDB,db.r4.2xlarge,db.r6g.2xlarge 203 | AmazonRDS,MariaDB,db.r4.4xlarge,db.r6g.4xlarge 204 | AmazonRDS,MariaDB,db.r4.8xlarge,db.r6g.8xlarge 205 | AmazonRDS,MariaDB,db.r4.16xlarge,db.r6g.16xlarge 206 | AmazonRDS,MariaDB,db.r5.large,db.r6g.large 207 | AmazonRDS,MariaDB,db.r5.xlarge,db.r6g.xlarge 208 | AmazonRDS,MariaDB,db.r5.2xlarge,db.r6g.2xlarge 209 | AmazonRDS,MariaDB,db.r5.4xlarge,db.r6g.4xlarge 210 | AmazonRDS,MariaDB,db.r5.8xlarge,db.r6g.8xlarge 211 | AmazonRDS,MariaDB,db.r5.12xlarge,db.r6g.12xlarge 212 | AmazonRDS,MariaDB,db.r5.16xlarge,db.r6g.16xlarge 213 | AmazonRDS,MariaDB,db.r5.24xlarge,None -------------------------------------------------------------------------------- /data-collection/deploy/deploy-data-read-permissions.yaml: -------------------------------------------------------------------------------- 1 | # https://github.com/awslabs/cid-data-collection-framework/blob/main/data-collection/v3.11.0/deploy/deploy-data-read-permissions.yaml 2 | AWSTemplateFormatVersion: '2010-09-09' 3 | Description: CID Data Collection - All-in-One for Management Account v3.11.0 - AWS Solution SO9011 4 | Metadata: 5 | AWS::CloudFormation::Interface: 6 | ParameterGroups: 7 | - Label: 8 | default: "Deployment parameters" 9 | Parameters: 10 | - DataCollectionAccountID 11 | - ResourcePrefix 12 | - ManagementAccountRole 13 | - MultiAccountRoleName 14 | - OrganizationalUnitIds 15 | - AllowModuleReadInMgmt 16 | - CFNSourceBucket 17 | - Label: 18 | default: "Available modules" 19 | Parameters: 20 | - IncludeBackupModule 21 | - IncludeBudgetsModule 22 | - IncludeComputeOptimizerModule 23 | - IncludeCostAnomalyModule 24 | - IncludeSupportCasesModule 25 | - IncludeECSChargebackModule 26 | - IncludeHealthEventsModule 27 | - IncludeInventoryCollectorModule 28 | - IncludeRDSUtilizationModule 29 | - IncludeEUCUtilizationModule 30 | - IncludeRightsizingModule 31 | - IncludeTAModule 32 | - IncludeTransitGatewayModule 33 | - IncludeLicenseManagerModule 34 | - IncludeServiceQuotasModule 35 | ParameterLabels: 36 | ManagementAccountRole: 37 | default: "Management account role" 38 | DataCollectionAccountID: 39 | default: "Data Collection Account Id" 40 | MultiAccountRoleName: 41 | default: "Multi Account Role Name" 42 | OrganizationalUnitIds: 43 | default: "Comma Delimited list of Organizational Unit IDs. StackSets will deploy a read role in all AWS Accounts within those OUs. See your OU ID here: https://console.aws.amazon.com/organizations/v2/home/accounts (we recommend choosing OU ID of your Root)" 44 | AllowModuleReadInMgmt: 45 | default: "Allow creation of read roles for modules in management account" 46 | ResourcePrefix: 47 | default: "Role Prefix" 48 | CFNSourceBucket: 49 | default: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'" 50 | IncludeBudgetsModule: 51 | default: "Include AWS Budgets Collection Module" 52 | IncludeComputeOptimizerModule: 53 | default: "Include AWS Compute Optimizer Data Collection Module" 54 | IncludeCostAnomalyModule: 55 | default: "Include Cost Anomalies Data Collection Module" 56 | IncludeSupportCasesModule: 57 | default: "Include Support Cases Data Collection Module" 58 | IncludeECSChargebackModule: 59 | default: "Include ECS Chargeback Data Collection Module" 60 | IncludeInventoryCollectorModule: 61 | default: "Include Inventory Collector Module" 62 | IncludeRDSUtilizationModule: 63 | default: "Include RDS Utilization Data Collection Module" 64 | IncludeEUCUtilizationModule: 65 | default: "Include WorkSpaces Utilization Data Collection Module" 66 | IncludeRightsizingModule: 67 | default: "Include Rightsizing Recommendations Data Collection Module" 68 | IncludeTAModule: 69 | default: "Include AWS Trusted Advisor Data Collection Module" 70 | IncludeTransitGatewayModule: 71 | default: "Include AWS TransitGateway Collection Module" 72 | IncludeBackupModule: 73 | default: "Include AWS Backup Collection Module" 74 | IncludeHealthEventsModule: 75 | default: "Include AWS Health Events Module" 76 | IncludeLicenseManagerModule: 77 | default: "Include Marketplace Licensing Module" 78 | IncludeServiceQuotasModule: 79 | default: "Include Service Quotas Module" 80 | Parameters: 81 | ManagementAccountRole: 82 | Type: String 83 | Description: The name of the IAM role that will be deployed in the management account which can retrieve AWS Organization data. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT 84 | Default: Lambda-Assume-Role-Management-Account 85 | DataCollectionAccountID: 86 | Type: String 87 | Description: AccountId of where the collector is deployed 88 | MultiAccountRoleName: 89 | Type: String 90 | Description: The name of the IAM role that will be deployed from the management account to linked accounts as a read only role. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT 91 | Default: "Optimization-Data-Multi-Account-Role" 92 | OrganizationalUnitIds: 93 | Type: String 94 | AllowedPattern: ^(ou-[a-z0-9]{4,32}-[a-z0-9]{8,32}|r-[a-z0-9]{4,32})(,(ou-[a-z0-9]{4,32}-[a-z0-9]{8,32}|r-[a-z0-9]{4,32}))*$ 95 | Description: "(Ex: r-ab01,ou-ab01-abcd1234) List of Organizational Unit IDs you wish to collect data for. It can be a single organizational unit. The organization root ID is usually preferred to collect data from all the member accounts." 96 | AllowModuleReadInMgmt: 97 | Type: String 98 | Description: Allows the creation of the read data roles for modules in the management account 99 | AllowedValues: 100 | - "yes" 101 | - "no" 102 | ResourcePrefix: 103 | Type: String 104 | Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable e.g. prefix- 105 | Default: "CID-DC-" 106 | CFNSourceBucket: 107 | Type: String 108 | Description: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'" 109 | Default: "aws-managed-cost-intelligence-dashboards" 110 | IncludeBudgetsModule: 111 | Type: String 112 | Description: Collects budgets from your accounts 113 | AllowedValues: ['yes', 'no'] 114 | Default: 'no' 115 | IncludeComputeOptimizerModule: 116 | Type: String 117 | Description: Collects AWS Compute Optimizer service recommendations 118 | AllowedValues: ['yes', 'no'] 119 | Default: 'no' 120 | IncludeCostAnomalyModule: 121 | Type: String 122 | Description: "Collects AWS Cost Explorer Cost Anomalies Recommendations" 123 | AllowedValues: ['yes', 'no'] 124 | Default: 'no' 125 | IncludeSupportCasesModule: 126 | Type: String 127 | Description: "Collects AWS Support Cases data" 128 | AllowedValues: ['yes', 'no'] 129 | Default: 'no' 130 | IncludeECSChargebackModule: 131 | Type: String 132 | Description: Collects data which shows costs associated with ECS Tasks leveraging EC2 instances within a Cluster 133 | AllowedValues: ['yes', 'no'] 134 | Default: 'no' 135 | IncludeInventoryCollectorModule: 136 | Type: String 137 | Description: Collects data about AMIs, EBS volumes and snapshots 138 | AllowedValues: ['yes', 'no'] 139 | Default: 'no' 140 | IncludeRDSUtilizationModule: 141 | Type: String 142 | Description: Collects RDS CloudWatch metrics from your accounts 143 | AllowedValues: ['yes', 'no'] 144 | Default: 'no' 145 | IncludeEUCUtilizationModule: 146 | Type: String 147 | Description: Collects WorkSpaces CloudWatch metrics from your accounts 148 | AllowedValues: ['yes', 'no'] 149 | Default: 'no' 150 | IncludeRightsizingModule: 151 | Type: String 152 | Description: "Collects AWS Cost Explorer Rightsizing Recommendations" 153 | AllowedValues: ['yes', 'no'] 154 | Default: 'no' 155 | IncludeTAModule: 156 | Type: String 157 | Description: Collects AWS Trusted Advisor recommendations data 158 | AllowedValues: ['yes', 'no'] 159 | Default: 'no' 160 | IncludeTransitGatewayModule: 161 | Type: String 162 | Description: Collects TransitGateway from your accounts 163 | AllowedValues: ['yes', 'no'] 164 | Default: 'no' 165 | IncludeBackupModule: 166 | Type: String 167 | Description: Collects AWS Backup events from your accounts 168 | AllowedValues: ['yes', 'no'] 169 | Default: 'no' 170 | IncludeHealthEventsModule: 171 | Type: String 172 | Description: Collects AWS Health Events from your accounts 173 | AllowedValues: ['yes', 'no'] 174 | Default: 'no' 175 | IncludeLicenseManagerModule: 176 | Type: String 177 | Description: Collects Marketplace Licensing information 178 | AllowedValues: ['yes', 'no'] 179 | Default: 'no' 180 | IncludeServiceQuotasModule: 181 | Type: String 182 | Description: Collects Service Quotas information 183 | AllowedValues: ['yes', 'no'] 184 | Default: 'no' 185 | 186 | Conditions: 187 | DeployModuleReadInMgmt: !Equals [!Ref AllowModuleReadInMgmt, "yes"] 188 | 189 | Resources: 190 | DataCollectorMgmtAccountReadStack: 191 | Type: AWS::CloudFormation::Stack 192 | Properties: 193 | TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-management-account.yaml" 194 | Parameters: 195 | DataCollectionAccountID: !Ref DataCollectionAccountID 196 | ManagementAccountRole: !Ref ManagementAccountRole 197 | ResourcePrefix: !Ref ResourcePrefix 198 | IncludeComputeOptimizerModule: !Ref IncludeComputeOptimizerModule 199 | IncludeCostAnomalyModule: !Ref IncludeCostAnomalyModule 200 | IncludeRightsizingModule: !Ref IncludeRightsizingModule 201 | IncludeBackupModule: !Ref IncludeBackupModule 202 | IncludeHealthEventsModule: !Ref IncludeHealthEventsModule 203 | IncludeLicenseManagerModule: !Ref IncludeLicenseManagerModule 204 | IncludeServiceQuotasModule: !Ref IncludeServiceQuotasModule 205 | DataCollectorMgmtAccountModulesReadStack: 206 | Type: AWS::CloudFormation::Stack 207 | Condition: DeployModuleReadInMgmt 208 | Properties: 209 | TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml" 210 | Parameters: 211 | DataCollectionAccountID: !Ref DataCollectionAccountID 212 | MultiAccountRoleName: !Ref MultiAccountRoleName 213 | ResourcePrefix: !Ref ResourcePrefix 214 | IncludeTAModule: !Ref IncludeTAModule 215 | IncludeSupportCasesModule: !Ref IncludeSupportCasesModule 216 | IncludeInventoryCollectorModule: !Ref IncludeInventoryCollectorModule 217 | IncludeECSChargebackModule: !Ref IncludeECSChargebackModule 218 | IncludeRDSUtilizationModule: !Ref IncludeRDSUtilizationModule 219 | IncludeEUCUtilizationModule: !Ref IncludeEUCUtilizationModule 220 | IncludeBudgetsModule: !Ref IncludeBudgetsModule 221 | IncludeTransitGatewayModule: !Ref IncludeTransitGatewayModule 222 | IncludeServiceQuotasModule: !Ref IncludeServiceQuotasModule 223 | DataCollectorOrgAccountModulesReadStackSet: 224 | Type: AWS::CloudFormation::StackSet 225 | Properties: 226 | Description: "StackSet in charge of deploying read roles across organization accounts v3.11.0" 227 | PermissionModel: SERVICE_MANAGED 228 | AutoDeployment: 229 | Enabled: true 230 | RetainStacksOnAccountRemoval: false 231 | ManagedExecution: 232 | Active: true 233 | OperationPreferences: 234 | MaxConcurrentPercentage: 100 235 | FailureTolerancePercentage: 100 236 | RegionConcurrencyType: PARALLEL 237 | Parameters: 238 | - ParameterKey: DataCollectionAccountID 239 | ParameterValue: !Ref DataCollectionAccountID 240 | - ParameterKey: MultiAccountRoleName 241 | ParameterValue: !Ref MultiAccountRoleName 242 | - ParameterKey: ResourcePrefix 243 | ParameterValue: !Ref ResourcePrefix 244 | - ParameterKey: IncludeTAModule 245 | ParameterValue: !Ref IncludeTAModule 246 | - ParameterKey: IncludeSupportCasesModule 247 | ParameterValue: !Ref IncludeSupportCasesModule 248 | - ParameterKey: IncludeInventoryCollectorModule 249 | ParameterValue: !Ref IncludeInventoryCollectorModule 250 | - ParameterKey: IncludeECSChargebackModule 251 | ParameterValue: !Ref IncludeECSChargebackModule 252 | - ParameterKey: IncludeRDSUtilizationModule 253 | ParameterValue: !Ref IncludeRDSUtilizationModule 254 | - ParameterKey: IncludeEUCUtilizationModule 255 | ParameterValue: !Ref IncludeEUCUtilizationModule 256 | - ParameterKey: IncludeBudgetsModule 257 | ParameterValue: !Ref IncludeBudgetsModule 258 | - ParameterKey: IncludeTransitGatewayModule 259 | ParameterValue: !Ref IncludeTransitGatewayModule 260 | - ParameterKey: IncludeServiceQuotasModule 261 | ParameterValue: !Ref IncludeServiceQuotasModule 262 | StackInstancesGroup: 263 | - DeploymentTargets: 264 | OrganizationalUnitIds: !Split [",", !Ref OrganizationalUnitIds] 265 | Regions: 266 | - !Ref "AWS::Region" 267 | Capabilities: 268 | - CAPABILITY_IAM 269 | - CAPABILITY_NAMED_IAM 270 | StackSetName: !Sub "StackSet-${AWS::AccountId}-OptimizationDataRole" 271 | TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml" 272 | -------------------------------------------------------------------------------- /data-collection/deploy/module-budgets.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: Retrieves Budgets data 3 | Parameters: 4 | DatabaseName: 5 | Type: String 6 | Description: Name of the Athena database to be created to hold lambda information 7 | Default: optimization_data 8 | DestinationBucket: 9 | Type: String 10 | Description: Name of the S3 Bucket to be created to hold data information 11 | AllowedPattern: (?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$) 12 | DestinationBucketARN: 13 | Type: String 14 | Description: ARN of the S3 Bucket that exists or needs to be created to hold rightsizing information 15 | MultiAccountRoleName: 16 | Type: String 17 | Description: Name of the IAM role deployed in all accounts which can retrieve AWS Data. 18 | CFDataName: 19 | Type: String 20 | Description: The name of what this cf is doing. 21 | Default: budgets 22 | GlueRoleARN: 23 | Type: String 24 | Description: Arn for the Glue Crawler role 25 | Schedule: 26 | Type: String 27 | Description: EventBridge Schedule to trigger the data collection 28 | Default: "rate(1 day)" 29 | ResourcePrefix: 30 | Type: String 31 | Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable 32 | LambdaAnalyticsARN: 33 | Type: String 34 | Description: Arn of lambda for Analytics 35 | AccountCollectorLambdaARN: 36 | Type: String 37 | Description: Arn of the Account Collector Lambda 38 | CodeBucket: 39 | Type: String 40 | Description: Source code bucket 41 | StepFunctionTemplate: 42 | Type: String 43 | Description: S3 key to the JSON template for the StepFunction 44 | StepFunctionExecutionRoleARN: 45 | Type: String 46 | Description: Common role for Step Function execution 47 | SchedulerExecutionRoleARN: 48 | Type: String 49 | Description: Common role for module Scheduler execution 50 | DataBucketsKmsKeysArns: 51 | Type: String 52 | Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." 53 | Default: "" 54 | 55 | Outputs: 56 | StepFunctionARN: 57 | Description: ARN for the module's Step Function 58 | Value: !GetAtt ModuleStepFunction.Arn 59 | Conditions: 60 | NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] 61 | Resources: 62 | LambdaRole: 63 | Type: AWS::IAM::Role 64 | Properties: 65 | RoleName: !Sub "${ResourcePrefix}${CFDataName}-LambdaRole" 66 | AssumeRolePolicyDocument: 67 | Statement: 68 | - Action: 69 | - sts:AssumeRole 70 | Effect: Allow 71 | Principal: 72 | Service: 73 | - !Sub "lambda.${AWS::URLSuffix}" 74 | Version: 2012-10-17 75 | ManagedPolicyArns: 76 | - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 77 | Path: / 78 | Policies: 79 | - PolicyName: !Sub "${CFDataName}-MultiAccount-LambdaRole" 80 | PolicyDocument: 81 | Version: "2012-10-17" 82 | Statement: 83 | - Effect: "Allow" 84 | Action: "sts:AssumeRole" 85 | Resource: !Sub "arn:${AWS::Partition}:iam::*:role/${MultiAccountRoleName}" 86 | - PolicyName: "S3Access" 87 | PolicyDocument: 88 | Version: "2012-10-17" 89 | Statement: 90 | - Effect: "Allow" 91 | Action: 92 | - "s3:PutObject" 93 | Resource: 94 | - !Sub "${DestinationBucketARN}/*" 95 | - !If 96 | - NeedDataBucketsKms 97 | - PolicyName: "KMS" 98 | PolicyDocument: 99 | Version: "2012-10-17" 100 | Statement: 101 | - Effect: "Allow" 102 | Action: 103 | - "kms:GenerateDataKey" 104 | Resource: !Split [ ',', !Ref DataBucketsKmsKeysArns ] 105 | - !Ref AWS::NoValue 106 | Metadata: 107 | cfn_nag: 108 | rules_to_suppress: 109 | - id: W28 # Resource found with an explicit name, this disallows updates that require replacement of this resource 110 | reason: "Need explicit name to identify role actions" 111 | 112 | LambdaFunction: 113 | Type: AWS::Lambda::Function 114 | Properties: 115 | FunctionName: !Sub '${ResourcePrefix}${CFDataName}-Lambda' 116 | Description: !Sub "Lambda function to retrieve ${CFDataName}" 117 | Runtime: python3.12 118 | Architectures: [x86_64] 119 | Code: 120 | ZipFile: | 121 | #Authors: 122 | # Stephanie Gooch - initial version 123 | # Mohideen - Added Budgets tag collection module 124 | import os 125 | import json 126 | import re 127 | import logging 128 | import datetime 129 | from json import JSONEncoder 130 | 131 | import boto3 132 | 133 | BUCKET = os.environ["BUCKET_NAME"] 134 | PREFIX = os.environ["PREFIX"] 135 | ROLE_NAME = os.environ['ROLE_NAME'] 136 | TMP_FILE = "/tmp/data.json" 137 | 138 | logger = logging.getLogger(__name__) 139 | logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO)) 140 | 141 | class DateTimeEncoder(JSONEncoder): 142 | """encoder for json with time object""" 143 | def default(self, o): 144 | if isinstance(o, (datetime.date, datetime.datetime)): 145 | return o.isoformat() 146 | return None 147 | 148 | def clean_value(value): 149 | """Clean string values by replacing special characters with underscores""" 150 | if isinstance(value, str): 151 | return re.sub(r'\W+', '_', value) 152 | elif isinstance(value, list): 153 | return [clean_value(v) for v in value] 154 | return value 155 | 156 | def process_cost_filters(budget): 157 | """Process and clean cost filters in budget""" 158 | cost_filters = budget.get('CostFilters', {}) 159 | if not cost_filters: 160 | budget['CostFilters'] = {'Filter': ['None']} 161 | return 162 | 163 | cleaned_filters = { 164 | re.sub(r'\W+', '_', key): clean_value(value) 165 | for key, value in cost_filters.items() 166 | } 167 | budget['CostFilters'] = cleaned_filters 168 | 169 | def assume_role(account_id, service, region): 170 | partition = boto3.session.Session().get_partition_for_region(region_name=region) 171 | cred = boto3.client('sts', region_name=region).assume_role( 172 | RoleArn=f"arn:{partition}:iam::{account_id}:role/{ROLE_NAME}", 173 | RoleSessionName="data_collection" 174 | )['Credentials'] 175 | return boto3.client( 176 | service, 177 | aws_access_key_id=cred['AccessKeyId'], 178 | aws_secret_access_key=cred['SecretAccessKey'], 179 | aws_session_token=cred['SessionToken'] 180 | ) 181 | 182 | def lambda_handler(event, context): #pylint: disable=W0613 183 | logger.info(f"Event data {json.dumps(event)}") 184 | if 'account' not in event: 185 | raise ValueError( 186 | "Please do not trigger this Lambda manually." 187 | "Find the corresponding state machine in Step Functions and Trigger from there." 188 | ) 189 | collection_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 190 | aws_partition = boto3.session.Session().get_partition_for_region(boto3.session.Session().region_name) 191 | account = json.loads(event["account"]) 192 | account_id = account["account_id"] 193 | account_name = account["account_name"] 194 | payer_id = account["payer_id"] 195 | 196 | logger.info(f"Collecting data for account: {account_id}") 197 | try: 198 | budgets_client = assume_role(account_id, "budgets", "us-east-1") # must be us-east-1 199 | count = 0 200 | with open(TMP_FILE, "w", encoding='utf-8') as f: 201 | for budget in budgets_client.get_paginator("describe_budgets").paginate(AccountId=account_id).search('Budgets'): 202 | if not budget: 203 | continue 204 | budget['collection_time'] = collection_time 205 | # Fetch tags for the budget using List tag for resource API 206 | budget_name = budget['BudgetName'] 207 | budget_tags = budgets_client.list_tags_for_resource(ResourceARN=f"arn:{aws_partition}:budgets::{account_id}:budget/{budget_name}") 208 | budget.update({ 209 | 'Account_ID': account_id, 210 | 'Account_Name': account_name, 211 | 'Tags': budget_tags.get('ResourceTags') or [] 212 | }) 213 | # Fetch CostFilters if available 214 | process_cost_filters(budget) 215 | # Add column plannedbudgetslimit as type array 216 | budget_limits = budget.pop('PlannedBudgetLimits', {}) 217 | budget['PlannedBudgetLimits_Flat'] = [ 218 | {'date': key, 'Amount': value.get('Amount'), 'Unit': value.get('Unit')} 219 | for key, value in budget_limits.items() 220 | ] 221 | f.write(json.dumps(budget, cls=DateTimeEncoder) + "\n") 222 | count += 1 223 | logger.info(f"Budgets collected: {count}") 224 | s3_upload(account_id, payer_id) 225 | except Exception as exc: #pylint: disable=broad-exception-caught 226 | if "AccessDenied" in str(exc): 227 | print(f'Failed to assume role {ROLE_NAME} in account {account_id}. Please make sure the role exists. {exc}') 228 | else: 229 | print(f'{exc}. Gracefully exiting from Lambda so we do not break all StepFunction Execution') 230 | return 231 | 232 | def s3_upload(account_id, payer_id): 233 | if os.path.getsize(TMP_FILE) == 0: 234 | logger.info(f"No data in file for {PREFIX}") 235 | return 236 | key = datetime.datetime.now().strftime(f"{PREFIX}/{PREFIX}-data/payer_id={payer_id}/year=%Y/month=%m/budgets-{account_id}.json") 237 | boto3.client('s3').upload_file(TMP_FILE, BUCKET, key) 238 | logger.info(f"Budget data for {account_id} stored at s3://{BUCKET}/{key}") 239 | 240 | Handler: 'index.lambda_handler' 241 | MemorySize: 2688 242 | Timeout: 300 243 | Role: !GetAtt LambdaRole.Arn 244 | Environment: 245 | Variables: 246 | BUCKET_NAME: !Ref DestinationBucket 247 | PREFIX: !Ref CFDataName 248 | ROLE_NAME: !Ref MultiAccountRoleName 249 | 250 | Metadata: 251 | cfn_nag: 252 | rules_to_suppress: 253 | - id: W89 # Lambda functions should be deployed inside a VPC 254 | reason: "No need for VPC in this case" 255 | - id: W92 # Lambda functions should define ReservedConcurrentExecutions to reserve simultaneous executions 256 | reason: "No need for simultaneous execution" 257 | 258 | LogGroup: 259 | Type: AWS::Logs::LogGroup 260 | Properties: 261 | LogGroupName: !Sub "/aws/lambda/${LambdaFunction}" 262 | RetentionInDays: 60 263 | 264 | Crawler: 265 | Type: AWS::Glue::Crawler 266 | Properties: 267 | Name: !Sub '${ResourcePrefix}${CFDataName}-Crawler' 268 | Role: !Ref GlueRoleARN 269 | DatabaseName: !Ref DatabaseName 270 | Targets: 271 | S3Targets: 272 | - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" 273 | 274 | ModuleStepFunction: 275 | Type: AWS::StepFunctions::StateMachine 276 | Properties: 277 | StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-StateMachine' 278 | StateMachineType: STANDARD 279 | RoleArn: !Ref StepFunctionExecutionRoleARN 280 | DefinitionS3Location: 281 | Bucket: !Ref CodeBucket 282 | Key: !Ref StepFunctionTemplate 283 | DefinitionSubstitutions: 284 | AccountCollectorLambdaARN: !Ref AccountCollectorLambdaARN 285 | ModuleLambdaARN: !GetAtt LambdaFunction.Arn 286 | Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-Crawler"]' 287 | CollectionType: 'LINKED' 288 | Params: '' 289 | Module: !Ref CFDataName 290 | DeployRegion: !Ref AWS::Region 291 | Account: !Ref AWS::AccountId 292 | Prefix: !Ref ResourcePrefix 293 | Bucket: !Ref DestinationBucket 294 | 295 | ModuleRefreshSchedule: 296 | Type: 'AWS::Scheduler::Schedule' 297 | Properties: 298 | Description: !Sub 'Scheduler for the ODC ${CFDataName} module' 299 | Name: !Sub '${ResourcePrefix}${CFDataName}-RefreshSchedule' 300 | ScheduleExpression: !Ref Schedule 301 | State: ENABLED 302 | FlexibleTimeWindow: 303 | MaximumWindowInMinutes: 30 304 | Mode: 'FLEXIBLE' 305 | Target: 306 | Arn: !GetAtt ModuleStepFunction.Arn 307 | RoleArn: !Ref SchedulerExecutionRoleARN 308 | 309 | AnalyticsExecutor: 310 | Type: Custom::LambdaAnalyticsExecutor 311 | Properties: 312 | ServiceToken: !Ref LambdaAnalyticsARN 313 | Name: !Ref CFDataName 314 | 315 | AthenaQuery: 316 | Type: AWS::Athena::NamedQuery 317 | Properties: 318 | Database: !Ref DatabaseName 319 | Description: !Sub "Provides a summary view of the ${CFDataName}" 320 | Name: !Sub "${CFDataName}_view" 321 | QueryString: !Sub | 322 | CREATE OR REPLACE VIEW budgets_view AS 323 | SELECT 324 | budgetname budget_name 325 | , CAST(budgetlimit.amount AS decimal) budget_amount 326 | , CAST(calculatedspend.actualspend.amount AS decimal) actualspend 327 | , CAST(calculatedspend.forecastedspend.amount AS decimal) forecastedspend 328 | , timeunit 329 | , budgettype budget_type 330 | , account_id 331 | , timeperiod.start start_date 332 | , timeperiod."end" end_date 333 | , year budget_year 334 | , month budget_month 335 | FROM 336 | ${DatabaseName}.budgets_data 337 | WHERE (budgettype = 'COST') AND costfilters.filter[1] = 'None' 338 | -------------------------------------------------------------------------------- /data-collection/deploy/module-license-manager.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: Retrieves AWS Marketplace License and grant information from AWS License manager from across an organization 3 | Parameters: 4 | DatabaseName: 5 | Type: String 6 | Description: Name of the Athena database to be created to hold lambda information 7 | Default: optimization_data 8 | DestinationBucket: 9 | Type: String 10 | Description: Name of the S3 Bucket that exists or needs to be created to hold backup information 11 | AllowedPattern: (?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$) 12 | DestinationBucketARN: 13 | Type: String 14 | Description: ARN of the S3 Bucket that exists or needs to be created to hold backup information 15 | ManagementRoleName: 16 | Type: String 17 | Description: The name of the IAM role that will be deployed in the management account which can retrieve AWS Organization data. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT 18 | CFDataName: 19 | Type: String 20 | Description: The name of what this cf is doing. 21 | Default: license-manager 22 | GrantDataPrefix: 23 | Type: String 24 | Description: Prefix for Grant data. 25 | Default: grants 26 | LicenseDataPrefix: 27 | Type: String 28 | Description: Prefix for License data. 29 | Default: licenses 30 | GlueRoleARN: 31 | Type: String 32 | Description: Arn for the Glue Crawler role 33 | Schedule: 34 | Type: String 35 | Description: EventBridge Schedule to trigger the data collection 36 | Default: "rate(14 days)" 37 | ResourcePrefix: 38 | Type: String 39 | Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable 40 | LambdaAnalyticsARN: 41 | Type: String 42 | Description: Arn of lambda for Analytics 43 | AccountCollectorLambdaARN: 44 | Type: String 45 | Description: Arn of the Account Collector Lambda 46 | CodeBucket: 47 | Type: String 48 | Description: Source code bucket 49 | StepFunctionTemplate: 50 | Type: String 51 | Description: S3 key to the JSON template for the StepFunction 52 | StepFunctionExecutionRoleARN: 53 | Type: String 54 | Description: Common role for Step Function execution 55 | SchedulerExecutionRoleARN: 56 | Type: String 57 | Description: Common role for module Scheduler execution 58 | DataBucketsKmsKeysArns: 59 | Type: String 60 | Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." 61 | Default: "" 62 | 63 | Conditions: 64 | NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] 65 | 66 | Outputs: 67 | StepFunctionARN: 68 | Description: ARN for the module's Step Function 69 | Value: !GetAtt ModuleStepFunction.Arn 70 | 71 | Resources: 72 | LambdaRole: 73 | Type: AWS::IAM::Role 74 | Properties: 75 | RoleName: !Sub "${ResourcePrefix}${CFDataName}-LambdaRole" 76 | AssumeRolePolicyDocument: 77 | Statement: 78 | - Action: 79 | - sts:AssumeRole 80 | Effect: Allow 81 | Principal: 82 | Service: 83 | - !Sub "lambda.${AWS::URLSuffix}" 84 | Version: 2012-10-17 85 | ManagedPolicyArns: 86 | - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 87 | Path: / 88 | Policies: 89 | - PolicyName: !Sub "${CFDataName}-ManagementAccount-LambdaRole" 90 | PolicyDocument: 91 | Version: "2012-10-17" 92 | Statement: 93 | - Effect: "Allow" 94 | Action: "sts:AssumeRole" 95 | Resource: !Sub "arn:${AWS::Partition}:iam::*:role/${ManagementRoleName}" # Need to assume a Read role in management accounts 96 | - !If 97 | - NeedDataBucketsKms 98 | - PolicyName: "KMS" 99 | PolicyDocument: 100 | Version: "2012-10-17" 101 | Statement: 102 | - Effect: "Allow" 103 | Action: 104 | - "kms:GenerateDataKey" 105 | Resource: !Split [ ',', !Ref DataBucketsKmsKeysArns ] 106 | - !Ref AWS::NoValue 107 | - PolicyName: "S3-Access" 108 | PolicyDocument: 109 | Version: "2012-10-17" 110 | Statement: 111 | - Effect: "Allow" 112 | Action: 113 | - "s3:PutObject" 114 | Resource: 115 | - !Sub "${DestinationBucketARN}/*" 116 | Metadata: 117 | cfn_nag: 118 | rules_to_suppress: 119 | - id: W28 # Resource found with an explicit name, this disallows updates that require replacement of this resource 120 | reason: "Need explicit name to identify role actions" 121 | 122 | LambdaFunction: 123 | Type: AWS::Lambda::Function 124 | Properties: 125 | FunctionName: !Sub '${ResourcePrefix}${CFDataName}-Lambda' 126 | Description: !Sub "Lambda function to retrieve ${CFDataName}" 127 | Runtime: python3.12 128 | Architectures: [x86_64] 129 | Code: 130 | ZipFile: | 131 | """ Collects AWS Marketplace Licensing and grant information, 132 | and uploads to S3. Creates Step functions, Glue crawler to crawl S3 bucket, 133 | creates Athena tables and view. 134 | """ 135 | import os 136 | import json 137 | import logging 138 | from datetime import date 139 | import boto3 140 | # Initialize AWS clients 141 | 142 | s3 = boto3.client('s3') 143 | athena = boto3.client('athena') 144 | 145 | logger = logging.getLogger(__name__) 146 | logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO)) 147 | BUCKET = os.environ['BUCKET_NAME'] 148 | ROLE = os.environ['ROLE_NAME'] 149 | S3_GRANTS_PREFIX = os.environ['S3_GRANTS_PREFIX'] 150 | S3_LICENSES_PREFIX = os.environ['S3_LICENSES_PREFIX'] 151 | PREFIX = os.environ['PREFIX'] 152 | 153 | def store_data_to_s3(data, prefix, payer_id): 154 | if not data: 155 | logger.info("No data") 156 | return 157 | json_data = "\n".join(json.dumps(entity) for entity in data) 158 | key = date.today().strftime(f"{PREFIX}/{PREFIX}-{prefix}/payer_id={payer_id}/year=%Y/month=%m/day=%d/%Y-%m-%d.json") 159 | s3.put_object( 160 | Bucket=BUCKET, 161 | Key=key, 162 | Body=json_data, 163 | ContentType='application/json' 164 | ) 165 | logger.info(f'File upload successful to s3://{BUCKET}/{key}') 166 | 167 | def get_received_licenses(license_manager): 168 | licenses = [] 169 | pagination_token = '' #nosec 170 | while True: #Operation list_received_licenses cannot be paginated 171 | response = license_manager.list_received_licenses( 172 | MaxResults=100, 173 | NextToken=pagination_token 174 | ) 175 | licenses.extend(response.get('Licenses', [])) 176 | pagination_token = response.get('NextToken', '') 177 | if not pagination_token: 178 | break 179 | return licenses 180 | 181 | def process_one_management_acc(management_account_id): 182 | region = boto3.session.Session().region_name 183 | partition = boto3.session.Session().get_partition_for_region(region_name=region) 184 | logger.debug('assuming role') 185 | cred = boto3.client('sts').assume_role( 186 | RoleArn=f"arn:{partition}:iam::{management_account_id}:role/{ROLE}", 187 | RoleSessionName="data_collection" 188 | )['Credentials'] 189 | license_manager = boto3.client( 190 | 'license-manager', 191 | "us-east-1", # Must be "us-east-1" 192 | aws_access_key_id=cred['AccessKeyId'], 193 | aws_secret_access_key=cred['SecretAccessKey'], 194 | aws_session_token=cred['SessionToken'], 195 | ) 196 | process_license_information(license_manager, management_account_id) 197 | 198 | def process_license_information(license_manager, management_account_id): 199 | logger.info("Retrieving licensing information") 200 | license_grants = [] 201 | try: 202 | marketplace_licenses = [ 203 | license_ for license_ in get_received_licenses(license_manager) 204 | if license_.get('Issuer', {}).get('Name') == 'AWS/Marketplace' 205 | ] 206 | 207 | for license_ in marketplace_licenses: 208 | license_arn = license_['LicenseArn'] 209 | try: 210 | grants_for_license = license_manager.list_received_grants_for_organization(LicenseArn=license_arn)['Grants'] 211 | except license_manager.exceptions.AccessDeniedException: 212 | print( 213 | 'ERROR: AccessDenied when getting grants for ', license_arn, 214 | 'Open https://us-east-1.console.aws.amazon.com/marketplace/home#/settings and make sure ' 215 | 'the organization trust in Marketplace settings is enabled. ' 216 | ) 217 | else: 218 | license_grants.extend(grants_for_license) 219 | 220 | # Store the licenses data to S3 221 | store_data_to_s3(marketplace_licenses, S3_LICENSES_PREFIX, management_account_id) 222 | 223 | # Store the grants data to S3 224 | store_data_to_s3(license_grants, S3_GRANTS_PREFIX, management_account_id) 225 | 226 | except Exception as exc: #pylint: disable=W0718 227 | logging.error(f"{management_account_id} : {exc}") 228 | return "Successful" 229 | 230 | def lambda_handler(event, context): #pylint: disable=W0613 231 | logger.info(f"Event data {json.dumps(event)}") 232 | if 'account' not in event: 233 | raise ValueError( 234 | "Please do not trigger this Lambda manually." 235 | "Find the corresponding state machine in Step Functions and Trigger from there." 236 | ) 237 | account = json.loads(event["account"]) 238 | try: 239 | process_one_management_acc(account["account_id"]) 240 | except Exception as exc: #pylint: disable=W0718 241 | logging.error(f"{account['account_id']} : {exc}") 242 | 243 | return "Successful" 244 | 245 | Handler: 'index.lambda_handler' 246 | MemorySize: 2688 247 | Timeout: 600 248 | Role: !GetAtt LambdaRole.Arn 249 | Environment: 250 | Variables: 251 | BUCKET_NAME: !Ref DestinationBucket 252 | S3_GRANTS_PREFIX: !Ref GrantDataPrefix 253 | S3_LICENSES_PREFIX: !Ref LicenseDataPrefix 254 | PREFIX: !Ref CFDataName 255 | ROLE_NAME: !Ref ManagementRoleName 256 | Metadata: 257 | cfn_nag: 258 | rules_to_suppress: 259 | - id: W89 # Lambda functions should be deployed inside a VPC 260 | reason: "No need for VPC in this case" 261 | - id: W92 # Lambda functions should define ReservedConcurrentExecutions to reserve simultaneous executions 262 | reason: "No need for simultaneous execution" 263 | 264 | LogGroup: 265 | Type: AWS::Logs::LogGroup 266 | Properties: 267 | LogGroupName: !Sub "/aws/lambda/${LambdaFunction}" 268 | RetentionInDays: 60 269 | 270 | GrantsCrawler: 271 | Type: AWS::Glue::Crawler 272 | Properties: 273 | Name: !Sub '${ResourcePrefix}${CFDataName}-${GrantDataPrefix}-Crawler' 274 | Role: !Ref GlueRoleARN 275 | DatabaseName: !Ref DatabaseName 276 | Targets: 277 | S3Targets: 278 | - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${GrantDataPrefix}/" 279 | 280 | LicensesCrawler: 281 | Type: AWS::Glue::Crawler 282 | Properties: 283 | Name: !Sub '${ResourcePrefix}${CFDataName}-${LicenseDataPrefix}-Crawler' 284 | Role: !Ref GlueRoleARN 285 | DatabaseName: !Ref DatabaseName 286 | Targets: 287 | S3Targets: 288 | - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${LicenseDataPrefix}/" 289 | 290 | ModuleStepFunction: 291 | Type: AWS::StepFunctions::StateMachine 292 | Properties: 293 | StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-StateMachine' 294 | StateMachineType: STANDARD 295 | RoleArn: !Ref StepFunctionExecutionRoleARN 296 | DefinitionS3Location: 297 | Bucket: !Ref CodeBucket 298 | Key: !Ref StepFunctionTemplate 299 | DefinitionSubstitutions: 300 | AccountCollectorLambdaARN: !Ref AccountCollectorLambdaARN 301 | ModuleLambdaARN: !GetAtt LambdaFunction.Arn 302 | Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-${LicenseDataPrefix}-Crawler","${ResourcePrefix}${CFDataName}-${GrantDataPrefix}-Crawler"]' 303 | CollectionType: "Payers" 304 | Params: '' 305 | Module: !Ref CFDataName 306 | DeployRegion: !Ref AWS::Region 307 | Account: !Ref AWS::AccountId 308 | Prefix: !Ref ResourcePrefix 309 | Bucket: !Ref DestinationBucket 310 | 311 | ModuleRefreshSchedule: 312 | Type: 'AWS::Scheduler::Schedule' 313 | Properties: 314 | Description: !Sub 'Scheduler for the ODC ${CFDataName} module' 315 | Name: !Sub '${ResourcePrefix}${CFDataName}-RefreshSchedule' 316 | ScheduleExpression: !Ref Schedule 317 | State: ENABLED 318 | FlexibleTimeWindow: 319 | MaximumWindowInMinutes: 30 320 | Mode: 'FLEXIBLE' 321 | Target: 322 | Arn: !GetAtt ModuleStepFunction.Arn 323 | RoleArn: !Ref SchedulerExecutionRoleARN 324 | 325 | AnalyticsExecutor: 326 | Type: Custom::LambdaAnalyticsExecutor 327 | Properties: 328 | ServiceToken: !Ref LambdaAnalyticsARN 329 | Name: !Ref CFDataName 330 | -------------------------------------------------------------------------------- /data-collection/deploy/source/ecs/Athena/bu_usage_view.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE VIEW bu_usage_view AS 2 | SELECT 3 | "bill_payer_account_id" 4 | , "line_item_product_code" 5 | , "line_item_usage_account_id" 6 | , "line_item_resource_id" 7 | , "task" 8 | , "resource_tags_aws_ecs_service_Name" 9 | , "line_item_usage_type" 10 | , "line_item_operation" 11 | , "sum"(CAST("sum_line_item_usage_amount" AS double)) "sum_line_item_usage_amount" 12 | , "cur"."month" 13 | , "cur"."year" 14 | , "cluster" 15 | , "services" 16 | , "servicearn" 17 | , "account_id" 18 | , "value" 19 | FROM 20 | (( 21 | SELECT 22 | "bill_payer_account_id" 23 | , "line_item_product_code" 24 | , "line_item_usage_account_id" 25 | , "line_item_resource_id" 26 | , "split"("line_item_resource_id", '/')[2] "task" 27 | , "resource_tags_aws_ecs_service_Name" 28 | , "line_item_usage_type" 29 | , "line_item_operation" 30 | , "sum"(CAST("line_item_usage_amount" AS double)) "sum_line_item_usage_amount" 31 | , "month" 32 | , "year" 33 | FROM 34 | ${CUR} 35 | WHERE ((("line_item_operation" = 'ECSTask-EC2') AND ("line_item_product_code" IN ('AmazonECS'))) AND ("line_item_usage_type" LIKE '%GB%')) 36 | GROUP BY "bill_payer_account_id", "line_item_usage_account_id", "line_item_product_code", "line_item_operation", "line_item_resource_id", "resource_tags_aws_ecs_service_Name", "line_item_usage_type", "line_item_operation", "month", "year" 37 | ) cur 38 | LEFT JOIN ( 39 | SELECT 40 | "cluster" 41 | , "services" 42 | , "servicearn" 43 | , "value" 44 | , "year" 45 | , "month" 46 | , "account_id" 47 | FROM 48 | cluster_metadata_view 49 | ) clusters_data ON ((("clusters_data"."account_id" = "cur"."line_item_usage_account_id") AND (("clusters_data"."services" = "cur"."resource_tags_aws_ecs_service_name") AND ("clusters_data"."year" = "cur"."year"))) AND ("clusters_data"."month" = "cur"."month"))) 50 | GROUP BY "bill_payer_account_id", "line_item_usage_account_id", "line_item_product_code", "line_item_operation", "line_item_resource_id", "resource_tags_aws_ecs_service_Name", "line_item_usage_type", "line_item_operation", "cur"."month", "cur"."year", "cluster", "services", "servicearn", "value", "task", "account_id" -------------------------------------------------------------------------------- /data-collection/deploy/source/ecs/Athena/cluster_metadata_view.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE VIEW cluster_metadata_view AS 2 | SELECT 3 | * 4 | , "tag"."value" 5 | FROM 6 | (ecs_services_clusters_data 7 | CROSS JOIN UNNEST("tags") t (tag)) 8 | WHERE ("tag"."key" = 'BU') -------------------------------------------------------------------------------- /data-collection/deploy/source/ecs/Athena/ec2_cluster_costs_view.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE VIEW ec2_cluster_costs_view AS 2 | SELECT 3 | "line_item_product_code" 4 | , "line_item_usage_account_id" 5 | , "line_item_resource_id" 6 | , "line_item_usage_type" 7 | , "sum"((CASE WHEN ("line_item_line_item_type" = 'SavingsPlanCoveredUsage') THEN "line_item_usage_amount" WHEN ("line_item_line_item_type" = 'DiscountedUsage') THEN "line_item_usage_amount" WHEN ("line_item_line_item_type" = 'Usage') THEN "line_item_usage_amount" ELSE 0 END)) "sum_line_item_usage_amount" 8 | , "sum"("line_item_unblended_cost") "unblended_cost" 9 | , "sum"((CASE WHEN ("line_item_line_item_type" = 'SavingsPlanCoveredUsage') THEN "savings_plan_savings_plan_effective_cost" WHEN ("line_item_line_item_type" = 'SavingsPlanRecurringFee') THEN ("savings_plan_total_commitment_to_date" - "savings_plan_used_commitment") WHEN ("line_item_line_item_type" = 'SavingsPlanNegation') THEN 0 WHEN ("line_item_line_item_type" = 'SavingsPlanUpfrontFee') THEN 0 WHEN ("line_item_line_item_type" = 'DiscountedUsage') THEN "reservation_effective_cost" WHEN ("line_item_line_item_type" = 'RIFee') THEN ("reservation_unused_amortized_upfront_fee_for_billing_period" + "reservation_unused_recurring_fee") ELSE "line_item_unblended_cost" END)) "sum_line_item_amortized_cost" 10 | , "month" 11 | , "year" 12 | FROM 13 | ${CUR} 14 | WHERE (((product_product_name = 'Amazon Elastic Compute Cloud') AND (("resource_tags_user_name" LIKE '%ECS%') OR ("resource_tags_user_name" LIKE '%ecs%'))) AND ((("line_item_usage_type" LIKE '%BoxUsage%') OR ("line_item_usage_type" LIKE '%Spot%')) OR (line_item_usage_type LIKE '%%EBS%%Volume%%'))) 15 | GROUP BY "resource_tags_user_name", "line_item_product_code", "line_item_usage_account_id", "line_item_resource_id", "line_item_usage_type", "month", "year" 16 | -------------------------------------------------------------------------------- /data-collection/deploy/source/ecs/Athena/ecs_chargeback_report.sql: -------------------------------------------------------------------------------- 1 | -- FINAL 2 | SELECT bu_usage_view.line_item_usage_account_id, sum(sum_line_item_usage_amount) AS task_usage, total_usage, (sum(sum_line_item_usage_amount)/total_usage) as "percent", ec2_cost, ((sum(sum_line_item_usage_amount)/total_usage)*ec2_cost) as ecs_cost, 3 | "cluster", 4 | services, 5 | servicearn, 6 | value, 7 | bu_usage_view.month, 8 | bu_usage_view.year 9 | FROM "bu_usage_view" 10 | 11 | left join (select line_item_usage_account_id, sum(sum_line_item_usage_amount) as total_usage, year, month from "bu_usage_view" where "cluster" <> '' group by line_item_usage_account_id, year, month) sum 12 | on sum.line_item_usage_account_id = bu_usage_view.line_item_usage_account_id 13 | and sum.month=bu_usage_view.month 14 | and sum.year=bu_usage_view.year 15 | left join 16 | (SELECT line_item_usage_account_id, month, year, sum(sum_line_item_amortized_cost) as ec2_cost FROM "ec2_cluster_costs_view" group by line_item_usage_account_id,month,year) ec2_cost 17 | on ec2_cost.month=bu_usage_view.month 18 | and ec2_cost.year=bu_usage_view.year 19 | and ec2_cost.line_item_usage_account_id=bu_usage_view.line_item_usage_account_id 20 | where "cluster" <> '' 21 | and bu_usage_view.month = '6' -- if((date_format(current_timestamp , '%M') = 'January'),bu_usage_view.month = '12', bu_usage_view.month = CAST((month(now())-1) AS VARCHAR) ) 22 | and bu_usage_view.year = '2021' -- if((date_format(current_timestamp , '%M') = 'January'), bu_usage_view.year = CAST((year(now())-1) AS VARCHAR) ,bu_usage_view.year = CAST(year(now()) AS VARCHAR)) 23 | GROUP BY "cluster", services, servicearn, value, bu_usage_view.month, bu_usage_view.year, bu_usage_view.line_item_usage_account_id, total_usage, ec2_cost -------------------------------------------------------------------------------- /data-collection/deploy/source/partition_repair_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | This utility realigns new Athena table partitions to the original type in the event that an API call 3 | returns a datatype that does not match the type from the initial crawler run. 4 | 5 | Usage: 6 | Determine the name of your database and the table you wish to alter: 7 | 8 | python3 {prog} 9 | 10 | """ 11 | import sys 12 | import logging 13 | import boto3 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | def realign_partitions(database_name, table_name): 18 | logger.info(f"Realigning partitions for {database_name}.{table_name}") 19 | 20 | glue_client = boto3.client("glue") 21 | 22 | # Get the data types of the base table 23 | table_response = glue_client.get_table( 24 | DatabaseName=database_name, 25 | Name=table_name 26 | ) 27 | 28 | column_to_datatype = { 29 | item["Name"]: item["Type"] for item in table_response["Table"]["StorageDescriptor"]["Columns"] 30 | } 31 | 32 | # List partitions and datatypes 33 | partition_params = { 34 | "DatabaseName": database_name, 35 | "TableName": table_name, 36 | } 37 | response = glue_client.get_partitions(**partition_params) 38 | partitions = response["Partitions"] 39 | 40 | while "NextToken" in response: 41 | partition_params["NextToken"] = response["NextToken"] 42 | response = glue_client.get_partitions(**partition_params) 43 | 44 | partitions += response["Partitions"] 45 | 46 | logger.debug(f"Found {len(partitions)} partitions") 47 | 48 | partitions_to_update = [] 49 | for partition in partitions: 50 | changed = False 51 | columns = partition["StorageDescriptor"]["Columns"] 52 | new_columns = [] 53 | for column in columns: 54 | if column["Name"] in column_to_datatype and column["Type"] != column_to_datatype[column["Name"]]: 55 | changed = True 56 | logger.debug(f"Changing type of {column['Name']} from {column['Type']} to {column_to_datatype[column['Name']]}") 57 | column["Type"] = column_to_datatype[column["Name"]] 58 | new_columns.append(column) 59 | partition["StorageDescriptor"]["Columns"] = new_columns 60 | if changed: 61 | partitions_to_update.append(partition) 62 | 63 | logger.debug(f"{len(partitions_to_update)} partitions of table {table_name} will be updated.") 64 | 65 | # Update partitions if necessary 66 | for partition in partitions_to_update: 67 | logger.debug(f"Updating {', '.join(partition['Values'])}") 68 | partition.pop("CatalogId") 69 | partition.pop("CreationTime") 70 | glue_client.update_partition( 71 | DatabaseName=partition.pop("DatabaseName"), 72 | TableName=partition.pop("TableName"), 73 | PartitionValueList=partition['Values'], 74 | PartitionInput=partition 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | logging.basicConfig(level=logging.ERROR) 80 | logger.setLevel(logging.DEBUG) 81 | try: 82 | database_name = sys.argv[1] 83 | table_name = sys.argv[2] 84 | except: 85 | print(__doc__.format(prog=sys.argv[0])) 86 | exit(1) 87 | realign_partitions(database_name, table_name) -------------------------------------------------------------------------------- /data-collection/deploy/source/regions.csv: -------------------------------------------------------------------------------- 1 | Region,Region Name,Endpoint,Protocol 2 | us-east-2,US East (Ohio),rds.us-east-2.amazonaws.com,HTTPS 3 | us-east-1,US East (N. Virginia),rds.us-east-1.amazonaws.com,HTTPS 4 | us-west-1,US West (N. California),rds.us-west-1.amazonaws.com,HTTPS 5 | us-west-2,US West (Oregon),rds.us-west-2.amazonaws.com,HTTPS 6 | af-south-1,Africa (Cape Town),rds.af-south-1.amazonaws.com,HTTPS 7 | ap-east-1,Asia Pacific (Hong Kong),rds.ap-east-1.amazonaws.com,HTTPS 8 | ap-south-1,Asia Pacific (Mumbai),rds.ap-south-1.amazonaws.com,HTTPS 9 | ap-northeast-3,Asia Pacific (Osaka-Local),rds.ap-northeast-3.amazonaws.com,HTTPS 10 | ap-northeast-2,Asia Pacific (Seoul),rds.ap-northeast-2.amazonaws.com,HTTPS 11 | ap-southeast-1,Asia Pacific (Singapore),rds.ap-southeast-1.amazonaws.com,HTTPS 12 | ap-southeast-2,Asia Pacific (Sydney),rds.ap-southeast-2.amazonaws.com,HTTPS 13 | ap-northeast-1,Asia Pacific (Tokyo),rds.ap-northeast-1.amazonaws.com,HTTPS 14 | ca-central-1,Canada (Central),rds.ca-central-1.amazonaws.com,HTTPS 15 | cn-north-1,China (Beijing),rds.cn-north-1.amazonaws.com.cn,HTTPS 16 | cn-northwest-1,China (Ningxia),rds.cn-northwest-1.amazonaws.com.cn,HTTPS 17 | eu-central-1,EU (Frankfurt),rds.eu-central-1.amazonaws.com,HTTPS 18 | eu-west-1,EU (Ireland),rds.eu-west-1.amazonaws.com,HTTPS 19 | eu-west-2,EU (London),rds.eu-west-2.amazonaws.com,HTTPS 20 | eu-south-1,EU (Milan),rds.eu-south-1.amazonaws.com,HTTPS 21 | eu-west-3,EU (Paris),rds.eu-west-3.amazonaws.com,HTTPS 22 | eu-north-1,EU (Stockholm),rds.eu-north-1.amazonaws.com,HTTPS 23 | me-south-1,Middle East (Bahrain),rds.me-south-1.amazonaws.com,HTTPS 24 | sa-east-1,South America (São Paulo),rds.sa-east-1.amazonaws.com,HTTPS 25 | us-gov-east-1,AWS GovCloud (US-East),rds.us-gov-east-1.amazonaws.com,HTTPS 26 | us-gov-west-1,AWS GovCloud (US),rds.us-gov-west-1.amazonaws.com,HTTPS -------------------------------------------------------------------------------- /data-collection/deploy/source/s3_backwards_comp.py: -------------------------------------------------------------------------------- 1 | from turtle import pd 2 | import boto3 3 | import sys 4 | import logging 5 | #python3 s3_backwards_comp.py 6 | 7 | payer_id = sys.argv[1] 8 | your_bucket_name = sys.argv[2] 9 | 10 | client = boto3.client('s3') 11 | 12 | mods = ["ecs-chargeback-data/", "rds_metrics/rds_stats/", "budgets/", "rightsizing/","optics-data-collector/ami-data/","optics-data-collector/ebs-data/", "optics-data-collector/snapshot-data/","optics-data-collector/ta-data/", "Compute_Optimizer/Compute_Optimizer_ec2_instance/", "Compute_Optimizer/Compute_Optimizer_auto_scale/", "Compute_Optimizer/Compute_Optimizer_lambda/", "Compute_Optimizer/Compute_Optimizer_ebs_volume/", "reserveinstance/", "savingsplan/", "transitgateway/"] 13 | 14 | for mod in mods: 15 | print(mod) 16 | response = client.list_objects_v2(Bucket= your_bucket_name, Prefix = mod) 17 | try: 18 | for key in response['Contents']: 19 | source_key = key["Key"] 20 | if 'payer_id' not in source_key: 21 | x = source_key.split("/")[0] 22 | source_key_new = source_key.replace(mod, '') 23 | copy_source = {'Bucket': your_bucket_name, 'Key': source_key} 24 | client.copy_object(Bucket = your_bucket_name, CopySource = copy_source, Key = f"{mod}payer_id={payer_id}/{source_key_new}") 25 | client.delete_object(Bucket = your_bucket_name, Key = source_key) 26 | else: 27 | print(f"{source_key} has payer") 28 | except Exception as e: 29 | logging.warning("%s" % e) 30 | continue 31 | -------------------------------------------------------------------------------- /data-collection/deploy/source/step-functions/awsfeeds-state-machine-v1.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "Execute Lambda and Crawler in standalone mode", 3 | "StartAt": "Lambda Invoke", 4 | "States": { 5 | "Lambda Invoke": { 6 | "Type": "Task", 7 | "Resource": "arn:aws:states:::lambda:invoke", 8 | "OutputPath": "$.Payload", 9 | "Parameters": { 10 | "FunctionName": "${ModuleLambdaARN}" 11 | }, 12 | "Retry": [ 13 | { 14 | "ErrorEquals": [ 15 | "Lambda.ServiceException", 16 | "Lambda.AWSLambdaException", 17 | "Lambda.SdkClientException", 18 | "Lambda.TooManyRequestsException" 19 | ], 20 | "IntervalSeconds": 1, 21 | "MaxAttempts": 3, 22 | "BackoffRate": 2 23 | } 24 | ], 25 | "Next": "GetCrawler1" 26 | }, 27 | "GetCrawler1": { 28 | "Type": "Task", 29 | "Parameters": { 30 | "Name": "${Crawler}" 31 | }, 32 | "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler", 33 | "Next": "Choice1", 34 | "OutputPath": "$.Crawler" 35 | }, 36 | "Choice1": { 37 | "Type": "Choice", 38 | "Choices": [ 39 | { 40 | "Not": { 41 | "Variable": "$.State", 42 | "StringEquals": "READY" 43 | }, 44 | "Next": "Wait for Crawler to be ready" 45 | } 46 | ], 47 | "Default": "StartCrawler" 48 | }, 49 | "Wait for Crawler to be ready": { 50 | "Type": "Wait", 51 | "Seconds": 15, 52 | "Next": "GetCrawler1" 53 | }, 54 | "StartCrawler": { 55 | "Type": "Task", 56 | "Parameters": { 57 | "Name": "${Crawler}" 58 | }, 59 | "Resource": "arn:aws:states:::aws-sdk:glue:startCrawler", 60 | "Next": "Wait for Crawler Execution" 61 | }, 62 | "Wait for Crawler Execution": { 63 | "Type": "Wait", 64 | "Seconds": 15, 65 | "Next": "GetCrawler2" 66 | }, 67 | "GetCrawler2": { 68 | "Type": "Task", 69 | "Parameters": { 70 | "Name": "${Crawler}" 71 | }, 72 | "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler", 73 | "Next": "Choice2", 74 | "OutputPath": "$.Crawler" 75 | }, 76 | "Choice2": { 77 | "Type": "Choice", 78 | "Choices": [ 79 | { 80 | "Not": { 81 | "Variable": "$.State", 82 | "StringEquals": "READY" 83 | }, 84 | "Next": "Wait for Crawler Execution" 85 | } 86 | ], 87 | "Default": "Completed" 88 | }, 89 | "Completed": { 90 | "Type": "Pass", 91 | "End": true 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /data-collection/deploy/source/step-functions/crawler-state-machine.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "Orchestrate the Glue Crawlers for the collected data", 3 | "StartAt": "SetVariables", 4 | "States": { 5 | "SetVariables": { 6 | "Type": "Pass", 7 | "Next": "CrawlerMap", 8 | "QueryLanguage": "JSONata", 9 | "Output": { 10 | "crawlers": "{% $states.input.crawlers %}" 11 | }, 12 | "Assign": { 13 | "behavior": "{% $states.input.behavior %}" 14 | } 15 | }, 16 | "CrawlerMap": { 17 | "Type": "Map", 18 | "ItemProcessor": { 19 | "ProcessorConfig": { 20 | "Mode": "INLINE" 21 | }, 22 | "StartAt": "GetCrawler", 23 | "States": { 24 | "GetCrawler": { 25 | "Type": "Task", 26 | "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler", 27 | "Retry": [ 28 | { 29 | "ErrorEquals": [ 30 | "States.ALL" 31 | ], 32 | "BackoffRate": 2, 33 | "IntervalSeconds": 1, 34 | "MaxAttempts": 8, 35 | "JitterStrategy": "FULL" 36 | } 37 | ], 38 | "Next": "IsReady", 39 | "QueryLanguage": "JSONata", 40 | "Arguments": { 41 | "Name": "{% $states.input %}" 42 | }, 43 | "Output": { 44 | "Name": "{% $states.result.Crawler.Name %}", 45 | "State": "{% $states.result.Crawler.State %}" 46 | } 47 | }, 48 | "IsReady": { 49 | "Type": "Choice", 50 | "Default": "WaitForCrawler", 51 | "Choices": [ 52 | { 53 | "Next": "StartCrawler", 54 | "Condition": "{% $states.input.State = 'READY' %}", 55 | "Output": { 56 | "Name": "{% $states.input.Name %}" 57 | } 58 | }, 59 | { 60 | "Next": "NotReadyNoWait", 61 | "Condition": "{% $states.input.State != 'READY' and $behavior = 'NOWAIT' %}" 62 | } 63 | ], 64 | "QueryLanguage": "JSONata", 65 | "Output": { 66 | "Name": "{% $states.input.Name %}" 67 | } 68 | }, 69 | "WaitForCrawler": { 70 | "Type": "Wait", 71 | "Seconds": 30, 72 | "Next": "GetCrawler", 73 | "QueryLanguage": "JSONata", 74 | "Output": "{% $states.input.Name %}" 75 | }, 76 | "StartCrawler": { 77 | "Type": "Task", 78 | "Resource": "arn:aws:states:::aws-sdk:glue:startCrawler", 79 | "End": true, 80 | "Retry": [ 81 | { 82 | "ErrorEquals": [ 83 | "States.ALL" 84 | ], 85 | "BackoffRate": 2, 86 | "IntervalSeconds": 1, 87 | "MaxAttempts": 8, 88 | "JitterStrategy": "FULL" 89 | } 90 | ], 91 | "QueryLanguage": "JSONata", 92 | "Arguments": { 93 | "Name": "{% $states.input.Name %}" 94 | } 95 | }, 96 | "NotReadyNoWait": { 97 | "Type": "Succeed", 98 | "QueryLanguage": "JSONata" 99 | } 100 | } 101 | }, 102 | "End": true, 103 | "QueryLanguage": "JSONata", 104 | "Items": "{% $states.input.crawlers %}" 105 | } 106 | }, 107 | "TimeoutSeconds": 1200 108 | } -------------------------------------------------------------------------------- /data-collection/deploy/source/step-functions/health-detail-state-machine.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "Collects Health Events", 3 | "StartAt": "SetGlobalVariables1", 4 | "States": { 5 | "SetGlobalVariables1": { 6 | "Type": "Pass", 7 | "QueryLanguage": "JSONata", 8 | "Assign": { 9 | "MODULE": "${Module}", 10 | "PARAMS": "", 11 | "DATA_COLLECTION_REGION": "${DeployRegion}", 12 | "DATA_COLLECTION_ACCOUNT": "${Account}", 13 | "PREFIX": "${Prefix}", 14 | "BUCKET": "${Bucket}", 15 | "CRAWLERS": ${Crawlers}, 16 | "EXE_UUID": "{% $states.input.main_exe_uuid %}", 17 | "CHILD_UUID": "{% $split($states.context.Execution.Id, ':')[7] %}", 18 | "EXE_START_TIME_SPLIT": "{% $split($states.context.Execution.StartTime, /[-:T.]/) %}", 19 | "MAP_KEY": "{% $states.input.file %}", 20 | "ACCOUNT": "{% $states.input.account %}", 21 | "INGEST_TIME": "{% $states.input.ingestion_time %}", 22 | "STACK_VERSION": "{% $states.input.stack_version %}" 23 | }, 24 | "Next": "SetGlobalVariables2" 25 | }, 26 | "SetGlobalVariables2": { 27 | "Type": "Pass", 28 | "QueryLanguage": "JSONata", 29 | "Assign": { 30 | "LOG_KEY_BASE": "{% 'logs/modules/'&$EXE_START_TIME_SPLIT[0]&'/'&$EXE_START_TIME_SPLIT[1]&'/'&$EXE_START_TIME_SPLIT[2]&'/'&$MODULE&'detail-sf-' %}", 31 | "CRAWLER_STATE_MACHINE": "{% 'arn:aws:states:'&$DATA_COLLECTION_REGION&':'&$DATA_COLLECTION_ACCOUNT&':stateMachine:'&$PREFIX&'CrawlerExecution-StateMachine' %}", 32 | "SUB_UUID": { 33 | "child-state-machine-exid": "{% $CHILD_UUID %}", 34 | "statemachine-id": "{% $states.context.StateMachine.Id %}" 35 | } 36 | }, 37 | "Next": "EntryLog" 38 | }, 39 | "EntryLog": { 40 | "Type": "Task", 41 | "Resource": "arn:aws:states:::aws-sdk:s3:putObject", 42 | "QueryLanguage": "JSONata", 43 | "Arguments": { 44 | "Bucket": "{% $BUCKET %}", 45 | "Key": "{% $LOG_KEY_BASE&'entry-'&$EXE_UUID&'.json' %}", 46 | "Body": { 47 | "Timestamp": "{% $replace($now(), 'Z', '') %}", 48 | "DataCollectionRegion": "{% $DATA_COLLECTION_REGION %}", 49 | "DataCollectionAccountId": "{% $DATA_COLLECTION_ACCOUNT %}", 50 | "Module": "{% $MODULE %}", 51 | "ModuleFunction": "child-sf-entry", 52 | "Params": "{% $PARAMS %}", 53 | "PayerId": "", 54 | "AccountId": "", 55 | "Region": "", 56 | "StatusCode": 200, 57 | "SubCode": "", 58 | "RecordCount": 0, 59 | "Description": "Child Step Function entered", 60 | "DataLocation": "", 61 | "MainExeUuid": "{% $EXE_UUID %}", 62 | "SubUuid": "{% $SUB_UUID %}", 63 | "Service": "StepFunction", 64 | "StackVersion": "{% $STACK_VERSION %}", 65 | "SubVersion": "" 66 | } 67 | }, 68 | "Retry": [ 69 | { 70 | "ErrorEquals": [ 71 | "States.ALL" 72 | ], 73 | "BackoffRate": 2, 74 | "IntervalSeconds": 1, 75 | "MaxAttempts": 3 76 | } 77 | ], 78 | "Next": "DataCollectionMap" 79 | }, 80 | "DataCollectionMap": { 81 | "Type": "Map", 82 | "QueryLanguage": "JSONata", 83 | "ItemReader": { 84 | "Resource": "arn:aws:states:::s3:getObject", 85 | "ReaderConfig": { 86 | "InputType": "CSV", 87 | "CSVHeaderLocation": "FIRST_ROW" 88 | }, 89 | "Arguments": { 90 | "Bucket": "{% $BUCKET %}", 91 | "Key": "{% $MAP_KEY %}" 92 | } 93 | }, 94 | "MaxConcurrency": 1, 95 | "ItemBatcher": { 96 | "MaxItemsPerBatch": 500, 97 | "BatchInput": { 98 | "account": "{% $ACCOUNT %}", 99 | "ingestion_time": "{% $INGEST_TIME %}", 100 | "main_exe_uuid": "{% $EXE_UUID %}", 101 | "sub_uuid": "{% $SUB_UUID %}", 102 | "bucket": "{% $BUCKET %}", 103 | "dc_account": "{% $DATA_COLLECTION_ACCOUNT %}", 104 | "dc_region": "{% $DATA_COLLECTION_REGION %}", 105 | "module": "{% $MODULE %}", 106 | "prefix": "{% $PREFIX %}", 107 | "log_key_base": "{% $LOG_KEY_BASE %}", 108 | "params": "{% $PARAMS %}", 109 | "stack_version": "{% $STACK_VERSION %}" 110 | } 111 | }, 112 | "ItemProcessor": { 113 | "ProcessorConfig": { 114 | "Mode": "DISTRIBUTED", 115 | "ExecutionType": "STANDARD" 116 | }, 117 | "StartAt": "DataCollectionLambda", 118 | "States": { 119 | "DataCollectionLambda": { 120 | "Type": "Task", 121 | "QueryLanguage": "JSONata", 122 | "Resource": "arn:aws:states:::lambda:invoke", 123 | "Arguments": { 124 | "FunctionName": "{% 'arn:aws:lambda:'&$states.input.BatchInput.dc_region&':'&$states.input.BatchInput.dc_account&':function:'&$states.input.BatchInput.prefix&$states.input.BatchInput.module&'-Lambda' %}", 125 | "Payload": { 126 | "account": "{% $states.input.BatchInput.account %}", 127 | "main_exe_uuid": "{% $states.input.BatchInput.main_exe_uuid %}", 128 | "sub_uuid": "{% $states.input.BatchInput.sub_uuid %}", 129 | "params": "{% $states.input.BatchInput.params %}", 130 | "ingestion_time": "{% $states.input.BatchInput.ingestion_time %}", 131 | "stack_version": "{% $states.input.BatchInput.stack_version %}", 132 | "items": "{% $states.input.Items %}" 133 | } 134 | }, 135 | "Catch": [ 136 | { 137 | "ErrorEquals": [ 138 | "States.ALL" 139 | ], 140 | "Output": { 141 | "account": "{% $states.input.BatchInput.account %}", 142 | "main_exe_uuid": "{% $states.input.BatchInput.main_exe_uuid %}", 143 | "sub_uuid": "{% $merge([$states.input.BatchInput.sub_uuid, {'map-state-machine-exid': $split($states.context.Execution.Id, ':')[7]}]) %}", 144 | "module": "{% $states.input.BatchInput.module %}", 145 | "bucket": "{% $states.input.BatchInput.bucket %}", 146 | "dc_account": "{% $states.input.BatchInput.dc_account %}", 147 | "dc_region": "{% $states.input.BatchInput.dc_region %}", 148 | "log_key_base": "{% $states.input.BatchInput.log_key_base %}", 149 | "params": "{% $states.input.BatchInput.params %}", 150 | "stack_version": "{% $states.input.BatchInput.stack_version %}", 151 | "description": "{% $states.errorOutput %}" 152 | }, 153 | "Next": "DCLambdaErrorMetric" 154 | } 155 | ], 156 | "Retry": [ 157 | { 158 | "ErrorEquals": [ 159 | "Lambda.TooManyRequestsException" 160 | ], 161 | "IntervalSeconds": 2, 162 | "MaxAttempts": 6, 163 | "BackoffRate": 2, 164 | "JitterStrategy": "FULL" 165 | } 166 | ], 167 | "End": true 168 | }, 169 | "DCLambdaErrorMetric": { 170 | "Type": "Task", 171 | "QueryLanguage": "JSONata", 172 | "Resource": "arn:aws:states:::aws-sdk:cloudwatch:putMetricData", 173 | "Arguments": { 174 | "Namespace": "CID-DataCollection", 175 | "MetricData": [ 176 | { 177 | "MetricName": "Error", 178 | "Value": 1, 179 | "Unit": "Count", 180 | "Dimensions": [ 181 | { 182 | "Name": "Module", 183 | "Value": "{% $states.input.module %}" 184 | } 185 | ] 186 | } 187 | ] 188 | }, 189 | "Output": { 190 | "account": "{% $states.input.account %}", 191 | "main_exe_uuid": "{% $states.input.main_exe_uuid %}", 192 | "sub_uuid": "{% $states.input.sub_uuid %}", 193 | "description": "{% $states.input.description %}", 194 | "module": "{% $states.input.module %}", 195 | "bucket": "{% $states.input.bucket %}", 196 | "dc_account": "{% $states.input.dc_account %}", 197 | "dc_region": "{% $states.input.dc_region %}", 198 | "log_key_base": "{% $states.input.log_key_base %}", 199 | "params": "{% $states.input.params %}", 200 | "stack_version": "{% $states.input.stack_version %}" 201 | }, 202 | "Next": "DCLambdaErrorLog" 203 | }, 204 | "DCLambdaErrorLog": { 205 | "Type": "Task", 206 | "Resource": "arn:aws:states:::aws-sdk:s3:putObject", 207 | "QueryLanguage": "JSONata", 208 | "Arguments": { 209 | "Bucket": "{% $states.input.bucket %}", 210 | "Key": "{% $states.input.log_key_base&'-'&$random()&'.json' %}", 211 | "Body": { 212 | "Timestamp": "{% $replace($now(), 'Z', '') %}", 213 | "DataCollectionRegion": "{% $states.input.dc_region %}", 214 | "DataCollectionAccountId": "{% $states.input.dc_account %}", 215 | "Module": "{% $states.input.module %}", 216 | "ModuleFunction": "sf-dc-lambda-error-log", 217 | "Params": "{% $states.input.params %}", 218 | "PayerId": "{% $states.input.account.payer_id %}", 219 | "AccountId": "", 220 | "Region": "", 221 | "StatusCode": 500, 222 | "SubCode": "", 223 | "RecordCount": 0, 224 | "Description": "{% $states.input.description %}", 225 | "DataLocation": "", 226 | "MainExeUuid": "{% $states.input.main_exe_uuid %}", 227 | "SubUuid": "{% $states.input.sub_uuid %}", 228 | "Service": "StepFunction", 229 | "StackVersion": "{% $states.input.stack_version %}", 230 | "SubVersion": "" 231 | } 232 | }, 233 | "Output": { 234 | "description": "{% $states.input.description %}" 235 | }, 236 | "Retry": [ 237 | { 238 | "ErrorEquals": [ 239 | "States.ALL" 240 | ], 241 | "BackoffRate": 2, 242 | "IntervalSeconds": 1, 243 | "MaxAttempts": 3 244 | } 245 | ], 246 | "Next": "FailMap" 247 | }, 248 | "FailMap": { 249 | "Type": "Fail", 250 | "QueryLanguage": "JSONata", 251 | "Error": "MapLambdaExecutionError", 252 | "Cause": "Error in Detail Lambda trapped. See logs in your Data Collection bucket." 253 | } 254 | } 255 | }, 256 | "Output": { 257 | "status_code": 200, 258 | "description": "Health Events detail Map task completed successfully" 259 | }, 260 | "Catch": [ 261 | { 262 | "ErrorEquals": [ 263 | "States.ALL" 264 | ], 265 | "Output": { 266 | "status_code": 500, 267 | "description": "{% $states.errorOutput %}" 268 | }, 269 | "Next": "MapErrorMetric" 270 | } 271 | ], 272 | "Next": "CrawlerStepFunction" 273 | }, 274 | "MapErrorMetric": { 275 | "Type": "Task", 276 | "QueryLanguage": "JSONata", 277 | "Resource": "arn:aws:states:::aws-sdk:cloudwatch:putMetricData", 278 | "Arguments": { 279 | "Namespace": "CID-DataCollection", 280 | "MetricData": [ 281 | { 282 | "MetricName": "Error", 283 | "Value": 1, 284 | "Unit": "Count", 285 | "Dimensions": [ 286 | { 287 | "Name": "Module", 288 | "Value": "{% $MODULE %}" 289 | } 290 | ] 291 | } 292 | ] 293 | }, 294 | "Assign": { 295 | "ExecutionStatus": 500, 296 | "Description": "Child Step Function AccountMap failed most or all executions" 297 | }, 298 | "Next": "ExitLog" 299 | }, 300 | "CrawlerStepFunction": { 301 | "Type": "Task", 302 | "QueryLanguage": "JSONata", 303 | "Resource": "arn:aws:states:::states:startExecution.sync:2", 304 | "Arguments": { 305 | "StateMachineArn": "{% $CRAWLER_STATE_MACHINE %}", 306 | "Input": { 307 | "behavior": "WAIT", 308 | "crawlers": "{% $CRAWLERS %}" 309 | } 310 | }, 311 | "Assign": { 312 | "ExecutionStatus": 200, 313 | "Description": "Child Step Function execution completed successfully" 314 | }, 315 | "Catch": [ 316 | { 317 | "ErrorEquals": [ 318 | "States.ALL" 319 | ], 320 | "Assign": { 321 | "ExecutionStatus": 500, 322 | "Description": "{% 'Child Step Function AccountMap failed with error: '&$states.errorOutput %}" 323 | }, 324 | "Next": "ExitLog" 325 | } 326 | ], 327 | "Next": "ExitLog" 328 | }, 329 | "ExitLog": { 330 | "Type": "Task", 331 | "Resource": "arn:aws:states:::aws-sdk:s3:putObject", 332 | "QueryLanguage": "JSONata", 333 | "Arguments": { 334 | "Bucket": "{% $BUCKET %}", 335 | "Key": "{% $LOG_KEY_BASE&'exit-'&$EXE_UUID&'.json' %}", 336 | "Body": { 337 | "Timestamp": "{% $replace($now(), 'Z', '') %}", 338 | "DataCollectionRegion": "{% $DATA_COLLECTION_REGION %}", 339 | "DataCollectionAccountId": "{% $DATA_COLLECTION_ACCOUNT %}", 340 | "Module": "{% $MODULE %}", 341 | "ModuleFunction": "child-sf-exit", 342 | "Params": "{% $PARAMS %}", 343 | "PayerId": "{% $ACCOUNT.payer_id %}", 344 | "AccountId": "", 345 | "Region": "", 346 | "StatusCode": "{% $ExecutionStatus %}", 347 | "SubCode": "", 348 | "RecordCount": 0, 349 | "Description": "{% $Description %}", 350 | "DataLocation": "", 351 | "MainExeUuid": "{% $EXE_UUID %}", 352 | "SubUuid": "{% $SUB_UUID %}", 353 | "Service": "StepFunction", 354 | "StackVersion": "{% $STACK_VERSION %}", 355 | "SubVersion": "" 356 | } 357 | }, 358 | "Retry": [ 359 | { 360 | "ErrorEquals": [ 361 | "States.ALL" 362 | ], 363 | "BackoffRate": 2, 364 | "IntervalSeconds": 1, 365 | "MaxAttempts": 3 366 | } 367 | ], 368 | "Next": "IsError" 369 | }, 370 | "IsError": { 371 | "Type": "Choice", 372 | "Choices": [ 373 | { 374 | "Condition": "{% $ExecutionStatus >= 500 %}", 375 | "Next": "Fail" 376 | } 377 | ], 378 | "QueryLanguage": "JSONata", 379 | "Default": "Success" 380 | }, 381 | "Success": { 382 | "Type": "Succeed" 383 | }, 384 | "Fail": { 385 | "Type": "Fail" 386 | } 387 | }, 388 | "TimeoutSeconds": 10800 389 | } -------------------------------------------------------------------------------- /data-collection/deploy/source/step-functions/main-state-machine-v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "Orchestrate the collection of ${Module} data", 3 | "StartAt": "AccountCollectorInvoke", 4 | "States": { 5 | "AccountCollectorInvoke": { 6 | "Type": "Task", 7 | "Resource": "arn:aws:states:::lambda:invoke", 8 | "Parameters": { 9 | "Payload": { 10 | "Type": "${CollectionType}" 11 | }, 12 | "FunctionName": "${AccountCollectorLambdaARN}" 13 | }, 14 | "Retry": [ 15 | { 16 | "ErrorEquals": [ 17 | "Lambda.ServiceException", 18 | "Lambda.AWSLambdaException", 19 | "Lambda.SdkClientException", 20 | "Lambda.TooManyRequestsException" 21 | ], 22 | "IntervalSeconds": 2, 23 | "MaxAttempts": 6, 24 | "BackoffRate": 2 25 | } 26 | ], 27 | "Next": "AccountMap", 28 | "ResultPath": "$.accountLambdaOutput" 29 | }, 30 | "AccountMap": { 31 | "Type": "Map", 32 | "ItemProcessor": { 33 | "ProcessorConfig": { 34 | "Mode": "DISTRIBUTED", 35 | "ExecutionType": "STANDARD" 36 | }, 37 | "StartAt": "InvokeModuleLambda", 38 | "States": { 39 | "InvokeModuleLambda": { 40 | "Type": "Task", 41 | "Resource": "arn:aws:states:${DeployRegion}:${Account}:lambda:invoke", 42 | "OutputPath": "$.Payload", 43 | "Parameters": { 44 | "Payload": { 45 | "account.$": "$.account", 46 | "params": "${Params}" 47 | }, 48 | "FunctionName": "${ModuleLambdaARN}" 49 | }, 50 | "Retry": [ 51 | { 52 | "ErrorEquals": [ 53 | "Lambda.ServiceException", 54 | "Lambda.AWSLambdaException", 55 | "Lambda.SdkClientException", 56 | "Lambda.TooManyRequestsException" 57 | ], 58 | "IntervalSeconds": 2, 59 | "MaxAttempts": 6, 60 | "BackoffRate": 2 61 | } 62 | ], 63 | "End": true 64 | } 65 | } 66 | }, 67 | "MaxConcurrency": 60, 68 | "ItemReader": { 69 | "Resource": "arn:aws:states:::s3:getObject", 70 | "ReaderConfig": { 71 | "InputType": "JSON" 72 | }, 73 | "Parameters": { 74 | "Bucket.$": "$.accountLambdaOutput.Payload.bucket", 75 | "Key.$": "$.accountLambdaOutput.Payload.accountList" 76 | } 77 | }, 78 | "Next": "CrawlerStepFunctionStartExecution" 79 | }, 80 | "CrawlerStepFunctionStartExecution": { 81 | "Type": "Task", 82 | "Resource": "arn:aws:states:::states:startExecution.sync:2", 83 | "Parameters": { 84 | "StateMachineArn": "arn:aws:states:${DeployRegion}:${Account}:stateMachine:${Prefix}CrawlerExecution-StateMachine", 85 | "Input": { 86 | "crawlers": ${Crawlers} 87 | } 88 | }, 89 | "End": true 90 | } 91 | }, 92 | "TimeoutSeconds": 10800 93 | } 94 | -------------------------------------------------------------------------------- /data-collection/deploy/source/step-functions/main-state-machine-v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "Orchestrate the collection of ${Module} data", 3 | "StartAt": "AccountCollectorInvoke", 4 | "States": { 5 | "AccountCollectorInvoke": { 6 | "Type": "Task", 7 | "Resource": "arn:aws:states:::lambda:invoke", 8 | "Parameters": { 9 | "Payload": { 10 | "Type": "${CollectionType}" 11 | }, 12 | "FunctionName": "${AccountCollectorLambdaARN}" 13 | }, 14 | "Retry": [ 15 | { 16 | "ErrorEquals": [ 17 | "Lambda.ServiceException", 18 | "Lambda.AWSLambdaException", 19 | "Lambda.SdkClientException", 20 | "Lambda.TooManyRequestsException" 21 | ], 22 | "IntervalSeconds": 2, 23 | "MaxAttempts": 6, 24 | "BackoffRate": 2 25 | } 26 | ], 27 | "Next": "AccountMap", 28 | "ResultPath": "$.accountLambdaOutput" 29 | }, 30 | "AccountMap": { 31 | "Type": "Map", 32 | "ItemProcessor": { 33 | "ProcessorConfig": { 34 | "Mode": "DISTRIBUTED", 35 | "ExecutionType": "STANDARD" 36 | }, 37 | "StartAt": "InvokeModuleLambda", 38 | "States": { 39 | "InvokeModuleLambda": { 40 | "Type": "Task", 41 | "Resource": "arn:aws:states:${DeployRegion}:${Account}:lambda:invoke", 42 | "OutputPath": "$.Payload", 43 | "Parameters": { 44 | "Payload": { 45 | "account.$": "$.account", 46 | "params": "${Params}" 47 | }, 48 | "FunctionName": "${ModuleLambdaARN}" 49 | }, 50 | "Retry": [ 51 | { 52 | "ErrorEquals": [ 53 | "Lambda.ServiceException", 54 | "Lambda.AWSLambdaException", 55 | "Lambda.SdkClientException", 56 | "Lambda.TooManyRequestsException" 57 | ], 58 | "IntervalSeconds": 2, 59 | "MaxAttempts": 6, 60 | "BackoffRate": 2 61 | } 62 | ], 63 | "End": true 64 | } 65 | } 66 | }, 67 | "MaxConcurrency": 60, 68 | "ItemReader": { 69 | "Resource": "arn:aws:states:::s3:getObject", 70 | "ReaderConfig": { 71 | "InputType": "JSON" 72 | }, 73 | "Parameters": { 74 | "Bucket.$": "$.accountLambdaOutput.Payload.bucket", 75 | "Key.$": "$.accountLambdaOutput.Payload.accountList" 76 | } 77 | }, 78 | "Next": "CrawlerStepFunctionStartExecution" 79 | }, 80 | "CrawlerStepFunctionStartExecution": { 81 | "Type": "Task", 82 | "Resource": "arn:aws:states:::states:startExecution.sync:2", 83 | "Parameters": { 84 | "StateMachineArn": "arn:aws:states:${DeployRegion}:${Account}:stateMachine:${Prefix}CrawlerExecution-StateMachine", 85 | "Input": { 86 | "crawlers": ${Crawlers} 87 | } 88 | }, 89 | "End": true 90 | } 91 | }, 92 | "TimeoutSeconds": 10800 93 | } 94 | -------------------------------------------------------------------------------- /data-collection/deploy/source/step-functions/standalone-state-machine.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "Execute Lambda and Crawler in standalone mode", 3 | "StartAt": "Lambda Invoke", 4 | "States": { 5 | "Lambda Invoke": { 6 | "Type": "Task", 7 | "Resource": "arn:aws:states:::lambda:invoke", 8 | "OutputPath": "$.Payload", 9 | "Parameters": { 10 | "FunctionName": "${ModuleLambdaARN}" 11 | }, 12 | "Retry": [ 13 | { 14 | "ErrorEquals": [ 15 | "Lambda.ServiceException", 16 | "Lambda.AWSLambdaException", 17 | "Lambda.SdkClientException", 18 | "Lambda.TooManyRequestsException" 19 | ], 20 | "IntervalSeconds": 1, 21 | "MaxAttempts": 3, 22 | "BackoffRate": 2 23 | } 24 | ], 25 | "Next": "GetCrawler1" 26 | }, 27 | "GetCrawler1": { 28 | "Type": "Task", 29 | "Parameters": { 30 | "Name": "${Crawler}" 31 | }, 32 | "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler", 33 | "Next": "Choice1", 34 | "OutputPath": "$.Crawler", 35 | "Retry": [ 36 | { 37 | "ErrorEquals": [ 38 | "Glue.ThrottlingException" 39 | ], 40 | "BackoffRate": 2, 41 | "IntervalSeconds": 5, 42 | "MaxAttempts": 5, 43 | "JitterStrategy": "FULL" 44 | } 45 | ] 46 | }, 47 | "Choice1": { 48 | "Type": "Choice", 49 | "Choices": [ 50 | { 51 | "Not": { 52 | "Variable": "$.State", 53 | "StringEquals": "READY" 54 | }, 55 | "Next": "Wait for Crawler to be ready" 56 | } 57 | ], 58 | "Default": "StartCrawler" 59 | }, 60 | "Wait for Crawler to be ready": { 61 | "Type": "Wait", 62 | "Seconds": 60, 63 | "Next": "GetCrawler1" 64 | }, 65 | "StartCrawler": { 66 | "Type": "Task", 67 | "Parameters": { 68 | "Name": "${Crawler}" 69 | }, 70 | "Resource": "arn:aws:states:::aws-sdk:glue:startCrawler", 71 | "Next": "Wait for Crawler Execution", 72 | "Retry": [ 73 | { 74 | "ErrorEquals": [ 75 | "Glue.ThrottlingException" 76 | ], 77 | "BackoffRate": 2, 78 | "IntervalSeconds": 5, 79 | "MaxAttempts": 5, 80 | "JitterStrategy": "FULL" 81 | } 82 | ] 83 | }, 84 | "Wait for Crawler Execution": { 85 | "Type": "Wait", 86 | "Seconds": 60, 87 | "Next": "GetCrawler2" 88 | }, 89 | "GetCrawler2": { 90 | "Type": "Task", 91 | "Parameters": { 92 | "Name": "${Crawler}" 93 | }, 94 | "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler", 95 | "Next": "Choice2", 96 | "OutputPath": "$.Crawler", 97 | "Retry": [ 98 | { 99 | "ErrorEquals": [ 100 | "Glue.ThrottlingException" 101 | ], 102 | "BackoffRate": 2, 103 | "IntervalSeconds": 5, 104 | "MaxAttempts": 5, 105 | "JitterStrategy": "FULL" 106 | } 107 | ] 108 | }, 109 | "Choice2": { 110 | "Type": "Choice", 111 | "Choices": [ 112 | { 113 | "Not": { 114 | "Variable": "$.State", 115 | "StringEquals": "READY" 116 | }, 117 | "Next": "Wait for Crawler Execution" 118 | } 119 | ], 120 | "Default": "Completed" 121 | }, 122 | "Completed": { 123 | "Type": "Pass", 124 | "End": true 125 | } 126 | } 127 | } -------------------------------------------------------------------------------- /data-collection/utils/bump-release.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import git 4 | import json 5 | 6 | repo = git.Repo('.') 7 | 8 | print(repo.git.execute('git checkout main'.split())) 9 | print(repo.git.execute('git pull'.split())) 10 | 11 | 12 | old_ver = json.load(open("data-collection/utils/version.json"))['version'] 13 | 14 | print (old_ver) 15 | bump='patch' 16 | if len(sys.argv)>1: 17 | bump = sys.argv[1] 18 | 19 | maj, minor, patch = map(int, old_ver.split('.')) 20 | 21 | if bump=='patch': 22 | new_ver = '.'.join(map(str,[maj, minor, patch + 1])) 23 | elif bump=='minor': 24 | new_ver = '.'.join(map(str,[maj, minor + 1, 0])) 25 | else: 26 | raise NotImplementedError('only patch and minor are implemented') 27 | 28 | print(repo.git.execute(f"git checkout -b release/{new_ver}".split())) 29 | 30 | 31 | tx = open("data-collection/utils/version.json").read() 32 | with open("data-collection/utils/version.json", "w") as f: 33 | f.write(tx.replace(old_ver,new_ver)) 34 | 35 | 36 | filenames = [ 37 | 'data-collection/deploy/deploy-data-read-permissions.yaml', 38 | 'data-collection/deploy/deploy-data-collection.yaml', 39 | 'data-collection/deploy/deploy-in-management-account.yaml', 40 | 'data-collection/deploy/deploy-in-linked-account.yaml', 41 | 'data-collection/deploy/source/step-functions/main-state-machine.json', 42 | "data-collection/utils/version.json", 43 | ] 44 | for filename in filenames: 45 | tx = open(filename).read() 46 | with open(filename, "w") as f: 47 | f.write(tx.replace(f"v{old_ver}", f"v{new_ver}")) 48 | 49 | 50 | print(repo.git.execute('git diff HEAD --unified=0'.split())) 51 | 52 | print('to undo:\n git checkout HEAD -- cfn-templates/cid-cfn.yml cid/_version.py') 53 | print(f"to continue:\n git commit -am 'release {new_ver}'; git push origin 'release/{new_ver}'") 54 | -------------------------------------------------------------------------------- /data-collection/utils/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2016,SC2086,SC2162 3 | # This script can be used for release 4 | 5 | export AWS_REGION=us-east-1 6 | export STACK_SET_NAME=LayerBuckets 7 | export CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards 8 | 9 | code_path=$(git rev-parse --show-toplevel)/data-collection/deploy 10 | version=v$(jq -r '.version' data-collection/utils/version.json) 11 | 12 | echo "sync to central bucket" 13 | aws s3 sync $code_path/ s3://$CENTRAL_BUCKET/cfn/data-collection/ 14 | aws s3 sync $code_path/ s3://$CENTRAL_BUCKET/cfn/data-collection/$version/ 15 | 16 | echo "sync to regional bucket with version prefix" 17 | aws cloudformation list-stack-instances \ 18 | --stack-set-name $STACK_SET_NAME \ 19 | --query 'Summaries[].[StackId,Region]' \ 20 | --output text | 21 | while read stack_id region; do 22 | echo "sync to $region" 23 | bucket=$(aws cloudformation list-stack-resources --stack-name $stack_id \ 24 | --query 'StackResourceSummaries[?LogicalResourceId == `LayerBucket`].PhysicalResourceId' \ 25 | --region $region --output text) 26 | 27 | aws s3 sync $code_path/ s3://$bucket/cfn/data-collection/ 28 | aws s3 sync $code_path/ s3://$bucket/cfn/data-collection/$version/ --delete 29 | done 30 | 31 | echo 'Done' 32 | -------------------------------------------------------------------------------- /data-collection/utils/upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2086 3 | # This script uploads CloudFormation files to S3 bucket. Can be used with any testing bucket or prod. 4 | # see also README.md 5 | 6 | if [ -n "$1" ]; then 7 | bucket=$1 8 | else 9 | echo "ERROR: First parameter not supplied. Provide a bucket name." 10 | exit 1 11 | fi 12 | code_path=$(git rev-parse --show-toplevel)/data-collection/deploy 13 | version=$(jq -r '.version' data-collection/utils/version.json) 14 | 15 | echo "Sync to $bucket" 16 | aws s3 sync $code_path/ s3://$bucket/cfn/data-collection/v$version/ --delete 17 | echo 'Done' 18 | -------------------------------------------------------------------------------- /data-collection/utils/version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "3.11.0" 3 | } -------------------------------------------------------------------------------- /data-exports/README.md: -------------------------------------------------------------------------------- 1 | # Data Exports and Legacy CUR 2 | 3 | ## Table of Contents 4 | - [Introduction](#introduction) 5 | - [Data Exports](#data-exports) 6 | - [Basic Architecture](#basic-architecture-of-data-exports) 7 | - [Advanced Architecture](#advanced-architecture-of-data-exports) 8 | - [Legacy Cost and Usage Report](#legacy-cost-and-usage-report) 9 | - [FAQ](#faq) 10 | 11 | ## Introduction 12 | This readme contains description of solutions for AWS Data Exports and Legacy CUR replication and consolidation across multiple accounts. This is a part of Cloud Intelligence Dashboards and it is recommended by [AWS Data Exports official documentation](https://docs.aws.amazon.com/cur/latest/userguide/dataexports-processing.html). 13 | 14 | ## Data Exports 15 | 16 | For deployment instructions, please refer to the documentation at: https://catalog.workshops.aws/awscid/data-exports. 17 | 18 | Check code here: [data-exports-aggregation.yaml](deploy/data-exports-aggregation.yaml) 19 | 20 | 21 | ### Basic Architecture of Data Exports 22 | ![Basic Architecture of Data Exports](/.images/architecture-data-exports.png "Basic Architecture of Data Exports") 23 | 24 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) delivers daily Cost & Usage Report (CUR2) and other reports to an [Amazon S3 Bucket](https://aws.amazon.com/s3/) in the Management Account. 25 | 2. [Amazon S3](https://aws.amazon.com/s3/) replication rule copies Export data to a dedicated Data Collection Account S3 bucket automatically. 26 | 3. [Amazon Athena](https://aws.amazon.com/athena/) allows querying data directly from the S3 bucket using an [AWS Glue](https://aws.amazon.com/glue/) table schema definition. 27 | 4. [Amazon QuickSight](https://aws.amazon.com/quicksight/) datasets can read from [Amazon Athena](https://aws.amazon.com/athena/). Check Cloud Intelligence Dashboards for more details. 28 | 29 | ### Advanced Architecture of Data Exports 30 | For customers with additional requirements, an enhanced architecture is available: 31 | 32 | ![Advanced Architecture of Data Exports](/.images/architecture-data-exports-advanced.png "Advanced Architecture of Data Exports") 33 | 34 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) service delivers updated monthly [Cost & Usage Report (CUR2)](https://docs.aws.amazon.com/cur/latest/userguide/what-is-cur.html) up to three times a day to an [Amazon S3](https://aws.amazon.com/s3/) Bucket in your AWS Account (either in Management/Payer Account or a regular Linked Account). In us-east-1 region, the CloudFormation creates native resources; in other regions, CloudFormation uses AWS Lambda and Custom Resource to provision Data Exports in us-east-1. 35 | 36 | 2. [Amazon S3 replication](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html) rules copy Export data to a dedicated Data Collection Account automatically. This replication filters out all metadata and makes the file structure on the S3 bucket compatible with [Amazon Athena](https://aws.amazon.com/athena/) and [AWS Glue](https://aws.amazon.com/glue/) requirements. 37 | 38 | 3. A [Bucket Policy](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-policies.html) controls which accounts can replicate data to the destination bucket. 39 | 40 | 4. [AWS Glue Crawler](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#crawling-component) runs every midnight UTC to update the partitions of the table definition in [AWS Glue Data Catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#data-catalog-component). 41 | 42 | 5. [Amazon QuickSight](https://aws.amazon.com/quicksight/) pulls data from Amazon Athena to its SPICE (Super-fast, Parallel, In-memory Calculation Engine). 43 | 44 | 6. Updated QuickSight dashboards are available for the users. 45 | 46 | 7. When collecting data exports for Linked accounts (not for Management Accounts), you may also want to collect data exports for the Data Collection account itself. In this case, specify the Data Collection account as the first in the list of Source Accounts. Replication is still required to remove metadata. 47 | 48 | 8. Athena's reading process can be affected by writing operations. When replication arrives, it might fail to update datasets, especially with high volumes of data. In such cases, consider scheduling temporary disabling and re-enabling of the Amazon S3 bucket policy that allows replication. Since exports typically arrive up to three times a day, this temporary deactivation has minimal side effects and the updated data will be available with the next data delivery. 49 | 50 | 9. (Optional) Secondary bucket replication enables customers to archive data exports, consolidating data exports from multiple AWS Organisations or deploying staging environments (as described below ). 51 | 52 | ### Using Secondary Replication Bucket 53 | There can be various situations where customers need to replicate data exports to multiple destinations. One common scenario is a large enterprise with multiple business units, each with one or more AWS organisations. For this large enterprise, the Headquarters requires a consolidated view across all Business Units while simultaneously enabling individual Business Units to have visibility into their own data. 54 | 55 | To accomplish this, both the Headquarters and Business Unit can implement separate data export destination stacks. Business Unit administrators, working from their management account, can specify a target bucket located within the Headquarters stack, enabling seamless data replication to both S3 buckets. 56 | 57 | Other scenario can be a replicating data to a staging environment for testing purposes. 58 | 59 | ![Secondary Replication Bucket](/.images/architecture-data-export-replication-to-secondary.png) 60 | 61 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) service delivers updated monthly [Cost & Usage Report (CUR2)](https://docs.aws.amazon.com/cur/latest/userguide/what-is-cur.html) up to three times a day to an [Amazon S3](https://aws.amazon.com/s3/) Bucket in the Business Unit AWS Account (either in Management/Payer Account or a regular Linked Account). In us-east-1 region, the CloudFormation creates native resources; in other regions, CloudFormation uses AWS Lambda and Custom Resource to provision Data Exports in us-east-1. 62 | 63 | 2. [Amazon S3 replication](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html) rules copy Export data to a dedicated Data Collection Account automatically. This replication filters out all metadata and makes the file structure on the S3 bucket compatible with [Amazon Athena](https://aws.amazon.com/athena/) and [AWS Glue](https://aws.amazon.com/glue/) requirements. 64 | 65 | 3. Using the Secondary Replication rule, the Export data is replicated from Business Unit to the S3 bucket in the Headquarters AWS account. Each Business unit should create Secondary Replication rule to replicate the data to the S3 bucket in the Headquarters AWS account. This provides the Headquarter a consolidated data of all the Business Units. 66 | 67 | ## Legacy Cost and Usage Report 68 | Legacy AWS Cost and Usage Reports (Legacy CUR) can still be used for Cloud Intelligence Dashboards and other use cases. 69 | 70 | The CID project provides a CloudFormation template for Legacy CUR. Unlike the Data Exports CloudFormation template, it does not provide AWS Glue tables. You can use this template to replicate CUR and aggregate CUR from multiple source accounts (Management or Linked). 71 | 72 | ![Basic Architecture of CUR](/.images/architecture-legacy-cur.png "Basic Architecture of CUR") 73 | 74 | 75 | Check code here: [cur-aggregation.yaml](deploy/cur-aggregation.yaml) 76 | 77 | ## FAQ 78 | 79 | ### Why replicate data instead of providing cross-account access? 80 | Cross-account access is possible but can be difficult to maintain, considering the many different roles that require this access, especially when dealing with multiple accounts. 81 | 82 | ### We only have one AWS Organization. Do we still need this? 83 | Yes. Throughout an organization's lifecycle, mergers and acquisitions may occur, so this approach prepares you for potential future scenarios. 84 | 85 | ### Can I use S3 Intelligent Tiering or S3 Infrequent Access (IA) for my CUR data connected to Athena? 86 | We strongly recommend **against** using S3 IA for CUR data that is connected to Athena, especially if you have active FinOps users querying this data. Here's why: 87 | - CUDOS typically only retrieves data for the last 7 months, so theoretically older data could be moved to S3 IA or managed with Intelligent Tiering. 88 | - Moving older CUR parquet files to IA could potentially reduce storage costs by up to 45%. 89 | - **However**, this only saves money if the data isn't frequently accessed. With S3 IA, you're charged $0.01 per GB retrieved. 90 | - Athena uses multiple computational nodes in parallel, and complex queries can multiply data reads dramatically. For every 1GB of data you want to scan, Athena might perform up to 75GB of S3 reads. 91 | - If someone runs a query without properly limiting it to specific billing periods, the retrieval costs can be astronomical. For example: 92 | * Scanning a full CUR of 600GB: `600GB × 75 × $0.01/GB` = `$450.00` for just one query! 93 | - Due to this risk of human error, we do not use storage tiering as a default and strongly advise against it for CUR data connected to Athena. 94 | We also advise agains Intelligent Tiering by default. 95 | - KPI Dashboard - one of our foundational dashboards - scans the entire CUR (Cost and Usage Report) data to detect the first snapshot and determine its age. This prevents AWS Intelligent Tiering from functioning effectively, as it forces all data to remain in frequent access tiers and result is unnecessary additional monitoring costs with no cost-saving benefits. 96 | -------------------------------------------------------------------------------- /data-exports/utils/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2016,SC2086,SC2162 3 | # This script can be used for release. 4 | 5 | CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards 6 | files=("data-exports-aggregation" "cur-aggregation") 7 | for file in "${files[@]}"; do 8 | #Here data export stack and legacy cur aggregation have their own versions 9 | version=$(grep '^Description:' "data-exports/deploy/${file}.yaml" | grep -o '[0-9]\+\.[0-9]\+\.[0-9]\+') 10 | source_path="data-exports/deploy/${file}.yaml" 11 | echo $source_path 12 | aws s3 cp "$source_path" "s3://$CENTRAL_BUCKET/cfn/data-exports/$version/${file}.yaml" 13 | aws s3 cp "$source_path" "s3://$CENTRAL_BUCKET/cfn/data-exports/latest/${file}.yaml" 14 | aws s3 cp "$source_path" "s3://$CENTRAL_BUCKET/cfn/${file}.yaml" 15 | done 16 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | minversion = 6.0 3 | log_format = %(asctime)s [%(levelname)8s] %(message)s 4 | log_cli = true 5 | log_cli_level = INFO 6 | addopts = -s 7 | testpaths = 8 | test -------------------------------------------------------------------------------- /rls/.gitignore: -------------------------------------------------------------------------------- 1 | **/create_rls.zip 2 | **vars 3 | *tox.ini 4 | -------------------------------------------------------------------------------- /rls/README.md: -------------------------------------------------------------------------------- 1 | # RLS generator for QuickSight 2 | 3 | ## About QS RLS generator 4 | Generate RLS csv file for QuickSight based on AWS Organizational Units. 5 | 6 | [About QuickSight RLS](https://docs.aws.amazon.com/quicksight/latest/user/restrict-access-to-a-data-set-using-row-level-security.html) 7 | [About AWS Organizational Unit ](https://docs.aws.amazon.com/organizations/latest/userguide/orgs_introduction.html) 8 | 9 | 10 | ## Getting Started 11 | 12 | Code can be executed locally or as Lambda. [AWS Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) are managed standard way. 13 | To run the lambda define following `ENV_VARS` with following DEFAULTS if ENV_VAR is not set. 14 | 15 | [Using AWS Lambda environment variables](https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html) 16 | 17 | 18 | List of Variables to preconfigure 19 | ``` 20 | OWNER_TAG = 'cid_users' 21 | BUCKET_NAME = 'NO DEFAULT' # Bucket where to upload the code 22 | QS_REGION = 'QS region' 23 | export MANAGEMENT_ACCOUNT_IDS='coma seaprated value of account_ids, format ACC_ID:REGION' 24 | export MANAGMENTROLENAME=WA-Lambda-Assume-Role-Management-Account # Role to Assume in every payer/management account 25 | TMP_RLS_FILE = '/tmp/cid_rls.csv' 26 | ``` 27 | ## Defining TAGS 28 | 29 | 1) Tags at root OU level, Give full access to all data and overwrite any other rules for user at other levels. 30 | 2) Tags at OU level will be Inherited TAG to all children accounts. 31 | 2) Tags at Account level will be generated rules for Account level. 32 | 33 | 34 | ## Output 35 | 36 | Output is writen to `TMP_RLS_FILE` location and uploaded to `BUCKET_NAME`. 37 | 38 | 39 | ## Example Output 40 | 41 | 42 | ``` 43 | UserName,account_id,payer_id 44 | vmindru@megacorp.corp,, 45 | vmindru_has_it_all,, 46 | Admin/vmindru-Isengard,, 47 | cross_ou_user,"0140000000,7200000,74700000,853000000", 48 | foo_inherit,74700000000, 49 | student1,"853000000,126000000", 50 | student2,"853678200000,126600000", 51 | other@company_foo.com,"363700000,1675000000", 52 | other@company.com,"36370000000,16750000000", 53 | vmindru@amazon.com,363000000000, 54 | ``` 55 | 56 | 57 | 58 | ## Create Lambda 59 | 60 | ### Create a new Lambda in same region with your QS Dashboards 61 | 62 | 1) Create new Lambda 63 | 2) Select Python 3.8 64 | 65 | ### Configure Lambda 66 | 67 | 1) Create and assign new Execution Role LambdaS3Org Role 68 | 2) Create and Add 2 Permission Policies to above LambdaS3Org Role 69 | 70 | `LambdaOrgS3ListTags` 71 | 72 | ``` 73 | { 74 | "Version": "2012-10-17", 75 | "Statement": [ 76 | { 77 | "Sid": "VisualEditor0", 78 | "Effect": "Allow", 79 | "Action": [ 80 | "organizations:ListAccountsForParent", 81 | "organizations:ListAccounts", 82 | "organizations:ListTagsForResource", 83 | "organizations:ListOrganizationalUnitsForParent" 84 | ], 85 | "Resource": "*" 86 | } 87 | ] 88 | } 89 | ``` 90 | 91 | `AWSLambdaS3ExecutionRole` 92 | 93 | ``` 94 | { 95 | "Version": "2012-10-17", 96 | "Statement": [ 97 | { 98 | "Sid": "VisualEditor0", 99 | "Effect": "Allow", 100 | "Action": "s3:GetObject", 101 | "Resource": "arn:aws:s3:::*" 102 | }, 103 | { 104 | "Sid": "VisualEditor1", 105 | "Effect": "Allow", 106 | "Action": "s3:PutObject", 107 | "Resource": "arn:aws:s3:::vmindru-cid-fr/cid_rls.csv" 108 | } 109 | ] 110 | } 111 | ``` 112 | 113 | ### Add ENV Variables 114 | 115 | Go to function settings and add ENV VARS 116 | 117 | `BUCKET_NAME` - Bucket where to upload RLS file 118 | `ROOT_OU` - ID of your root OU 119 | 120 | ### Increase execution time to 120s 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /rls/deploy/deploy_cid_rls.yaml: -------------------------------------------------------------------------------- 1 | #https://github.com/awslabs/cid-data-collection-framework/blob/main/rls/deploy/deploy_cid_rls.yaml 2 | AWSTemplateFormatVersion: '2010-09-09' 3 | Description: Lambda to collect AWS Organization and Amazon QuickSight data and store in S3 for RLS implementation v0.2.0 - AWS Solution SO9011 4 | Parameters: 5 | DestinationBucket: 6 | Type: String 7 | Description: Name of the S3 Bucket that is created to hold org data 8 | AllowedPattern: (?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$) 9 | ManagementAccountRole: 10 | Type: String 11 | Description: The name of the IAM role that will be deployed in the management account which can retrieve AWS Organization data. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT 12 | Default: Lambda-Assume-Role-Management-Account 13 | ResourcePrefix: 14 | Type: String 15 | Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable 16 | Default: CID-DC- 17 | ManagementAccountID: 18 | Type: String 19 | AllowedPattern: ([a-z0-9\-, ]*?$) 20 | Description: "(Ex: 123456789,098654321,789054312) List of Payer IDs you wish to collect data for. Can just be one Accounts" 21 | Schedule: 22 | Type: String 23 | Description: Cron job to trigger the lambda using cloudwatch event 24 | Default: "rate(1 hour)" 25 | CodeBucket: 26 | Type: String 27 | Description: S3 Bucket with RLS code,this coverts to CodeBucket-Region e.g. for us-east-1 this will be aws-managed-cost-intelligence-dashboards-us-east-1 28 | Default: aws-managed-cost-intelligence-dashboards 29 | CodeKey: 30 | Type: String 31 | Description: file name of ZipFile with data code 32 | Default: cfn/rls/create_rls.zip # RLS Folder to be updated, once the LAB will be created 33 | Outputs: 34 | LambdaFunctionName: 35 | Value: 36 | Ref: CIDRLS 37 | LambdaFunctionARN: 38 | Description: Lambda function ARN. 39 | Value: 40 | Fn::GetAtt: 41 | - CIDRLS 42 | - Arn 43 | Resources: 44 | CIDRLS: 45 | Type: AWS::Lambda::Function 46 | Properties: 47 | FunctionName: !Sub 48 | - 'CIDRLS_${Id}' 49 | - Id: !Select [0, !Split ['-', !Ref 'AWS::StackName']] 50 | Description: LambdaFunction of python3.8. 51 | Runtime: python3.9 52 | Code: 53 | S3Bucket: !Sub '${CodeBucket}-${AWS::Region}' 54 | S3Key: !Ref CodeKey 55 | Handler: 'create_rls.lambda_handler' 56 | MemorySize: 2688 57 | Timeout: 600 58 | Role: !GetAtt LambdaRole.Arn 59 | Environment: 60 | Variables: 61 | BUCKET_NAME: !Ref DestinationBucket 62 | MANAGEMENTROLENAME: !Sub "${ResourcePrefix}${ManagementAccountRole}" 63 | MANAGEMENT_ACCOUNT_IDS: !Ref ManagementAccountID 64 | QS_REGION: !Ref AWS::Region 65 | LambdaRole: 66 | Type: AWS::IAM::Role 67 | Properties: 68 | RoleName: !Sub "${ResourcePrefix}RLS-LambdaRole" 69 | AssumeRolePolicyDocument: 70 | Statement: 71 | - Action: 72 | - sts:AssumeRole 73 | Effect: Allow 74 | Principal: 75 | Service: 76 | - lambda.amazonaws.com 77 | Version: 2012-10-17 78 | ManagedPolicyArns: 79 | - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 80 | Path: / 81 | Policies: 82 | - PolicyName: "Assume-Management-Organization-Data-Role" 83 | PolicyDocument: 84 | Version: "2012-10-17" 85 | Statement: 86 | - Effect: "Allow" 87 | Action: "sts:AssumeRole" 88 | Resource: !Sub "arn:${AWS::Partition}:iam::*:role/${ResourcePrefix}${ManagementAccountRole}" # Need to assume a Read role in management accounts 89 | - PolicyName: "Logs" 90 | PolicyDocument: 91 | Version: "2012-10-17" 92 | Statement: 93 | - Effect: "Allow" 94 | Action: 95 | - "logs:CreateLogGroup" 96 | - "logs:CreateLogStream" 97 | - "logs:PutLogEvents" 98 | - "logs:DescribeLogStreams" 99 | Resource: !Sub "arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/CID-RLS*" 100 | - Effect: "Allow" 101 | Action: 102 | - "s3:PutObject" 103 | - "s3:ListBucket" 104 | Resource: 105 | - !Sub "arn:${AWS::Partition}:s3:::${DestinationBucket}" 106 | - !Sub "arn:${AWS::Partition}:s3:::${DestinationBucket}/*" 107 | - Effect: "Allow" 108 | Action: 109 | - "quicksight:ListUsers" 110 | Resource: "*" # Cannot restrict this 111 | CloudWatchTrigger: 112 | Type: AWS::Events::Rule 113 | Properties: 114 | Description: Scheduler 115 | Name: !Sub 116 | - 'Scheduler_ForCIDRLS_${Id}' 117 | - Id: !Select [0, !Split ['-', !Ref 'AWS::StackName']] 118 | ScheduleExpression: !Ref Schedule 119 | State: ENABLED 120 | Targets: 121 | - Arn: !GetAtt CIDRLS.Arn 122 | Id: TriggerForCIDRLS 123 | EventPermission: 124 | Type: AWS::Lambda::Permission 125 | Properties: 126 | FunctionName: !GetAtt CIDRLS.Arn 127 | Action: lambda:InvokeFunction 128 | Principal: events.amazonaws.com 129 | SourceAccount: !Ref 'AWS::AccountId' 130 | SourceArn: !GetAtt CloudWatchTrigger.Arn 131 | LambdaAnalyticsRole: #Execution role for the custom resource 132 | Type: AWS::IAM::Role 133 | Properties: 134 | Path: 135 | Fn::Sub: /${ResourcePrefix}/ 136 | AssumeRolePolicyDocument: 137 | Version: 2012-10-17 138 | Statement: 139 | - Effect: Allow 140 | Principal: 141 | Service: 142 | - lambda.amazonaws.com 143 | Action: 144 | - sts:AssumeRole 145 | ManagedPolicyArns: 146 | - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 147 | LambdaAnalytics: 148 | Type: AWS::Lambda::Function 149 | Properties: 150 | Runtime: python3.9 151 | FunctionName: !Sub ${ResourcePrefix}RLS-Analytics 152 | Handler: index.lambda_handler 153 | MemorySize: 128 154 | Role: !GetAtt LambdaAnalyticsRole.Arn 155 | Timeout: 15 156 | Environment: 157 | Variables: 158 | WA_ANALYTICS_ENDPOINT: https://okakvoavfg.execute-api.eu-west-1.amazonaws.com/ 159 | Code: 160 | ZipFile: | 161 | import os 162 | import json 163 | import uuid 164 | import urllib3 165 | import boto3 166 | endpoint = os.environ['WA_ANALYTICS_ENDPOINT'] 167 | account_id = boto3.client("sts").get_caller_identity()["Account"] 168 | def lambda_handler(event, context): 169 | print(json.dumps(event)) 170 | try: 171 | if event['RequestType'].upper() not in ['CREATE', 'UPDATE', 'UPDATE']: 172 | raise Exception(f"Unknown RequestType {event['RequestType']}") 173 | action = event['RequestType'].upper() 174 | method = {'CREATE':'PUT', 'UPDATE': 'PATCH', 'DELETE': 'DELETE'}.get(action) 175 | via_key = {'CREATE':'created_via', 'UPDATE': 'updated_via', 'DELETE': 'deleted_via'}.get(action) 176 | payload = {'dashboard_id': 'cid/rls-org', 'account_id': account_id, via_key: 'CFN'} 177 | r = urllib3.PoolManager().request(method, endpoint, body=json.dumps(payload).encode('utf-8'), headers={'Content-Type': 'application/json'}) 178 | if r.status != 200: 179 | raise Exception(f"There has been an issue logging action, server did not respond with a 200 response, actual status: {r.status}, response data {r.data.decode('utf-8')}. This issue will be ignored") 180 | res, reason = 'SUCCESS', 'success' 181 | except Exception as exc: 182 | res, reason = 'SUCCESS', f"{exc} . This issue will be ignored" 183 | body = { 184 | 'Status': res, 185 | 'Reason': reason, 186 | 'PhysicalResourceId': event.get('PhysicalResourceId', str(uuid.uuid1())), 187 | 'StackId': event.get('StackId'), 188 | 'RequestId': event.get('RequestId'), 189 | 'LogicalResourceId': event.get('LogicalResourceId'), 190 | 'NoEcho': False, 191 | 'Data': {'Reason': reason}, 192 | } 193 | json_body=json.dumps(body) 194 | print(json_body) 195 | url = event.get('ResponseURL') 196 | if not url: return 197 | try: 198 | response = urllib3.PoolManager().request('PUT', url, body=json_body, headers={'content-type' : '', 'content-length' : str(len(json_body))}, retries=False) 199 | print(f"Status code: {response}") 200 | except Exception as exc: 201 | print("Failed sending PUT to CFN: " + str(exc)) 202 | LambdaAnalyticsExecutor: 203 | Type: Custom::LambdaAnalyticsExecutor 204 | Properties: 205 | ServiceToken: !GetAtt LambdaAnalytics.Arn 206 | -------------------------------------------------------------------------------- /rls/utils/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2086 3 | # This script builds a zip to be uploaded 4 | 5 | code_path=$(git rev-parse --show-toplevel)/rls/deploy 6 | 7 | rm $code_path/create_rls.zip 8 | zip -j $code_path/create_rls.zip $code_path/create_rls.py 9 | echo 'Done build' 10 | -------------------------------------------------------------------------------- /rls/utils/bump-release.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import git 3 | import json 4 | 5 | repo = git.Repo('.') 6 | 7 | print(repo.git.execute('git checkout main'.split())) 8 | print(repo.git.execute('git pull'.split())) 9 | module = 'rls' 10 | 11 | 12 | old_ver = json.load(open(f"{module}/utils/version.json"))['version'] 13 | 14 | print(old_ver) 15 | bump = 'patch' 16 | if len(sys.argv) > 1: 17 | bump = sys.argv[1] 18 | 19 | maj, minor, patch = map(int, old_ver.split('.')) 20 | 21 | if bump == 'patch': 22 | new_ver = '.'.join(map(str, [maj, minor, patch + 1])) 23 | elif bump == 'minor': 24 | new_ver = '.'.join(map(str, [maj, minor + 1, 0])) 25 | else: 26 | raise NotImplementedError('only patch and minor are implemented') 27 | 28 | print(repo.git.execute(f"git checkout -b release/{new_ver}".split())) 29 | 30 | 31 | tx = open("{module}/utils/version.json").read() 32 | with open("{module}/utils/version.json", "w") as f: 33 | f.write(tx.replace(old_ver, new_ver)) 34 | 35 | 36 | filenames = [ 37 | f"{module}/deploy/deploy-{module}.yaml", 38 | f"{module}/utils/version.json", 39 | ] 40 | for filename in filenames: 41 | tx = open(filename).read() 42 | with open(filename, "w") as f: 43 | f.write(tx.replace(f"v{old_ver}", f"v{new_ver}")) 44 | 45 | 46 | print(repo.git.execute('git diff HEAD --unified=0'.split())) 47 | 48 | print('to undo:\n git checkout HEAD -- cfn-templates/cid-cfn.yml cid/_version.py') 49 | print(f"to continue:\n git commit -am 'release {new_ver}'; git push origin 'release/{new_ver}'") 50 | -------------------------------------------------------------------------------- /rls/utils/qs_s3_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "fileLocations": [ 3 | { 4 | "URIs": [ 5 | "s3:///cid_rls/cid_rls.csv", 6 | ] 7 | } 8 | ], 9 | "globalUploadSettings": { 10 | "format": "CSV", 11 | "delimiter": ",", 12 | "textqualifier": "\"", 13 | "containsHeader": "true" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /rls/utils/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2016,SC2086,SC2162 3 | # This script can be used for release 4 | 5 | export AWS_REGION=us-east-1 6 | export STACK_SET_NAME=LayerBuckets 7 | export CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards 8 | 9 | code_path=$(git rev-parse --show-toplevel)/rls/deploy 10 | 11 | echo 'building lambda zip' 12 | "$(git rev-parse --show-toplevel)/rls/utils/build.sh" 13 | 14 | echo "sync to central bucket" 15 | aws s3 sync $code_path/ s3://$CENTRAL_BUCKET/cfn/rls/ 16 | 17 | 18 | 19 | aws cloudformation list-stack-instances \ 20 | --stack-set-name $STACK_SET_NAME \ 21 | --query 'Summaries[].[StackId,Region]' \ 22 | --output text | 23 | while read stack_id region; do 24 | echo "sync to $region" 25 | bucket=$(aws cloudformation list-stack-resources --stack-name $stack_id \ 26 | --query 'StackResourceSummaries[?LogicalResourceId == `LayerBucket`].PhysicalResourceId' \ 27 | --region $region --output text) 28 | 29 | aws s3 sync $code_path/ s3://$bucket/cfn/rls/ --delete 30 | done 31 | 32 | echo 'Done' 33 | -------------------------------------------------------------------------------- /rls/utils/tagger/aws_org_tagger_lambda.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import csv 3 | import logging 4 | 5 | 6 | def read_csv_function(filename): 7 | 8 | file = open(f"{filename}", "r", encoding='utf-8-sig') 9 | data = list(csv.DictReader(file, delimiter=",")) 10 | file.close() 11 | 12 | return data 13 | 14 | def org_function(key,value, account_id): 15 | 16 | client = boto3.client('organizations') 17 | response = client.list_tags_for_resource( 18 | ResourceId=account_id 19 | ) 20 | response = client.tag_resource( 21 | ResourceId=account_id, 22 | Tags=[ 23 | { 24 | 'Key': key, 25 | 'Value': value 26 | }, 27 | ] 28 | ) 29 | return response 30 | 31 | def main(): 32 | filename = 'data.csv' 33 | key = 'cid_users' 34 | map_data = read_csv_function(filename) 35 | 36 | for line in map_data: 37 | try: 38 | account_id = line['Account ID'] 39 | value = line['cid_users'] 40 | org_function(key, value, account_id) 41 | except Exception as e: 42 | logging.info("%s" % e) 43 | pass 44 | 45 | def lambda_handler(event, context): 46 | main() 47 | 48 | if __name__ == '__main__': 49 | main() 50 | 51 | 52 | -------------------------------------------------------------------------------- /rls/utils/tagger/data.csv: -------------------------------------------------------------------------------- 1 | Account ID,cid_users 2 | 111122223333,exmaple@email.com 3 | 444455556666,exmaple@email.com:example2@email.com 4 | ou-111112222,exmaple@email.com -------------------------------------------------------------------------------- /rls/utils/tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length=160 3 | 4 | -------------------------------------------------------------------------------- /rls/utils/version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0.1" 3 | } 4 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | 2 | Please check [CONTRIBUTING GUIDE](https://github.com/awslabs/cid-framework/blob/main/data-collection/CONTRIBUTING.md). 3 | -------------------------------------------------------------------------------- /test/cleanup.py: -------------------------------------------------------------------------------- 1 | ''' cleanup test environment 2 | ''' 3 | import logging 4 | 5 | import boto3 6 | 7 | from utils import cleanup_stacks, PREFIX 8 | 9 | if __name__ == '__main__': 10 | logging.basicConfig(level=logging.INFO) 11 | account_id = boto3.client("sts").get_caller_identity()["Account"] 12 | cloudformation = boto3.client('cloudformation') 13 | 14 | # Sometimes cloud formation deletes a role needed for management of stacksets. For these cases we can create just this role. If it exists stack will fail, but it is ok. 15 | try: 16 | cloudformation.delete_stack(StackName='TempDebugCIDStackSets') 17 | cloudformation.create_stack( 18 | TemplateBody=open('data-collection/test/debugstackets.yml').read(), 19 | StackName='TempDebugCIDStackSets', 20 | Parameters=[ 21 | {'ParameterKey': 'AdministratorAccountId', 'ParameterValue': account_id} 22 | ], 23 | Capabilities=['CAPABILITY_NAMED_IAM'], 24 | ) 25 | except Exception as exc: 26 | print(exc) 27 | 28 | cleanup_stacks( 29 | cloudformation=boto3.client('cloudformation'), 30 | account_id=account_id, 31 | s3=boto3.resource('s3'), 32 | s3client=boto3.client('s3'), 33 | athena=boto3.client('athena'), 34 | glue=boto3.client('glue'), 35 | ) 36 | 37 | cloudformation.delete_stack(StackName='TempDebugCIDStackSets') 38 | logging.info('Cleanup Done') 39 | 40 | # delete all log groups 41 | logs = boto3.client('logs') 42 | for log_group in logs.get_paginator('describe_log_groups').paginate(logGroupNamePrefix=f'/aws/lambda/{PREFIX}').search('logGroups'): 43 | logs.delete_log_group(logGroupName=log_group['logGroupName']) 44 | print(f"deleted {log_group['logGroupName']}") 45 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import logging 3 | import os 4 | 5 | 6 | import boto3 7 | import pytest 8 | 9 | from utils import prepare_stacks, cleanup_stacks 10 | 11 | logger = logging.getLogger(__name__) 12 | _start_time = None 13 | 14 | 15 | @pytest.fixture(scope='session') 16 | def session(): 17 | return boto3.session.Session() 18 | 19 | @pytest.fixture(scope='session') 20 | def athena(): 21 | return boto3.client('athena') 22 | 23 | @pytest.fixture(scope='session') 24 | def lambda_client(): 25 | return boto3.client('lambda') 26 | 27 | @pytest.fixture(scope='session') 28 | def support(): 29 | return boto3.client('support') 30 | 31 | @pytest.fixture(scope='session') 32 | def cloudformation(): 33 | return boto3.client('cloudformation') 34 | 35 | 36 | @pytest.fixture(scope='session') 37 | def s3(): 38 | return boto3.resource('s3') 39 | 40 | @pytest.fixture(scope='session') 41 | def s3client(): 42 | return boto3.client('s3') 43 | 44 | @pytest.fixture(scope='session') 45 | def compute_optimizer(): 46 | return boto3.client('compute-optimizer') 47 | 48 | 49 | @pytest.fixture(scope='session') 50 | def account_id(): 51 | return boto3.client("sts").get_caller_identity()["Account"] 52 | 53 | @pytest.fixture(scope='session') 54 | def org_unit_id(): 55 | return boto3.client("organizations").list_roots()["Roots"][0]["Id"] 56 | 57 | @pytest.fixture(scope='session') 58 | def org_unit_id(): 59 | return boto3.client("organizations").list_roots()["Roots"][0]["Id"] 60 | 61 | @pytest.fixture(scope='session') 62 | def glue(): 63 | return boto3.client("glue") 64 | 65 | @pytest.fixture(scope='session') 66 | def bucket(): 67 | bucket_name = os.environ.get('bucket') 68 | if bucket_name: 69 | return bucket_name 70 | print('env var `bucket` not found') 71 | default_bucket = f'cid-{account_id()}-test' 72 | s3 = boto3.client('s3') 73 | try: 74 | s3.head_bucket(Bucket=default_bucket) 75 | return default_bucket 76 | except s3.exceptions.ClientError as exc: 77 | print(f'bucket {default_bucket} not found in the account. {exc}') 78 | raise AssertionError( 79 | 'You need a bucket to run the tests. Please set bucket env variable ' 80 | '`export bucket=existing-bucket` or create a default bucket ' 81 | f'`aws s3api create-bucket --bucket {default_bucket}`' 82 | ) 83 | 84 | 85 | @pytest.fixture(scope='session') 86 | def start_time(): 87 | global _start_time 88 | if _start_time is None: 89 | _start_time = datetime.now() 90 | 91 | return _start_time 92 | 93 | def pytest_addoption(parser): 94 | parser.addoption("--mode", action="store", default="normal", choices=("normal", "no-teardown") ) 95 | 96 | @pytest.fixture(scope='session') 97 | def mode(request): 98 | return request.config.getoption("--mode") 99 | 100 | @pytest.fixture(scope='session', autouse=True) 101 | def prepare_setup(athena, cloudformation, s3, s3client, account_id, org_unit_id, bucket, start_time, mode, glue): 102 | yield prepare_stacks(cloudformation=cloudformation, account_id=account_id, org_unit_id=org_unit_id, bucket=bucket, s3=s3, s3client=s3client) 103 | 104 | mode = pytest.params.get('mode', mode) 105 | if mode != "no-teardown": 106 | cleanup_stacks(cloudformation=cloudformation, account_id=account_id, s3=s3, s3client=s3client, athena=athena, glue=glue) -------------------------------------------------------------------------------- /test/debugstackets.yml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: 2010-09-09 2 | Description: Configure the StackSetAdminRole to enable use of AWS CloudFormation StackSets. 3 | 4 | Parameters: 5 | AdministratorAccountId: 6 | Type: String 7 | Description: AWS Account Id of the administrator account (the account in which StackSets will be created). 8 | MaxLength: 12 9 | MinLength: 12 10 | 11 | Resources: 12 | ExecutionRole: 13 | Type: AWS::IAM::Role 14 | Properties: 15 | RoleName: CID-DC-ComputeOptimizer-StackSetExecutionRole 16 | AssumeRolePolicyDocument: 17 | Version: 2012-10-17 18 | Statement: 19 | - Effect: Allow 20 | Principal: 21 | AWS: 22 | - !Ref AdministratorAccountId 23 | Action: 24 | - sts:AssumeRole 25 | Path: / 26 | ManagedPolicyArns: 27 | - !Sub arn:${AWS::Partition}:iam::aws:policy/AdministratorAccess 28 | 29 | AdministrationRole: 30 | Type: AWS::IAM::Role 31 | Properties: 32 | RoleName: CID-DC-StackSetAdminRole 33 | AssumeRolePolicyDocument: 34 | Version: 2012-10-17 35 | Statement: 36 | - Effect: Allow 37 | Principal: 38 | Service: cloudformation.amazonaws.com 39 | Action: 40 | - sts:AssumeRole 41 | Path: / 42 | Policies: 43 | - PolicyName: AssumeRole-CID-DC-ComputeOptimizer-StackSetExecutionRole 44 | PolicyDocument: 45 | Version: 2012-10-17 46 | Statement: 47 | - Effect: Allow 48 | Action: 49 | - sts:AssumeRole 50 | Resource: 51 | - "arn:*:iam::*:role/CID-DC-ComputeOptimizer-StackSetExecutionRole" -------------------------------------------------------------------------------- /test/run-test-from-scratch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # see ../CONTRIBUTION.md 3 | 4 | # vars 5 | account_id=$(aws sts get-caller-identity --query "Account" --output text ) 6 | bucket=cid-$account_id-test 7 | export bucket 8 | 9 | # upload files 10 | ./data-collection/utils/upload.sh "$bucket" 11 | 12 | # run test 13 | python3 ./test/test_from_scratch.py "$@" -------------------------------------------------------------------------------- /utils/lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2086,SC2181 3 | # This script runs cfn-lint cfn_nag_scan and checkov for all templates in folder 4 | 5 | RED='\033[0;31m' 6 | GREEN='\033[0;32m' 7 | YELLOW='\033[0;33m' 8 | NC='\033[0m' # No Color 9 | 10 | folder=$(git rev-parse --show-toplevel) 11 | success_count=0 12 | failure_count=0 13 | 14 | # CKV_AWS_18 - Ensure AWS access logging is enabled on S3 buckets - Public is not publically shared, and access is limited to QS and account Admins thus logging is not required. Also avoid additional costs. 15 | # CKV_AWS_116 - Ensure the S3 bucket has logging enabled - Public is not publically shared, and access is limited to QS and account Admins thus logging is not required. Also avoid additional costs. 16 | # CKV_AWS_117 - Ensure AWS Lambda function is configured inside a VPC - Not requied for Lambda functionality as only AWS API calls are used. 17 | # CKV_AWS_173 - Check encryption settings for Lambda environmental variable - No sensitive parameters in environmental variables 18 | # CKV_AWS_195 - Ensure Glue component has a security configuration associated - AWS managed encryption is used for s3. 19 | # CKV_SECRET_6 - Base64 High Entropy String - Remove false positives 20 | # CKV_AWS_115 - Ensure that AWS Lambda function is configured for function-level concurrent execution limit - No need for concurency reservation 21 | # CKV_AWS_158 - Ensure that CloudWatch Log Group is encrypted by KMS - No need as there no sesible information in the logs 22 | checkov_skip=CKV_AWS_18,CKV_AWS_117,CKV_AWS_116,CKV_AWS_173,CKV_AWS_195,CKV_SECRET_6,CKV_AWS_115,CKV_AWS_158 23 | 24 | export exclude_files=("module-inventory.yaml" "module-pricing.yaml" "module-backup.yaml") # For::Each breaks lint :'( 25 | 26 | yaml_files=$(find "$folder" -type f -name "*.yaml" -exec ls -1t "{}" +;) # ordered by date 27 | 28 | for file in $yaml_files; do 29 | echo "Linting $(basename $file)" 30 | fail=0 31 | 32 | # checkov 33 | output=$(eval checkov --skip-download --skip-check $checkov_skip --quiet -f "$file") 34 | if [ $? -ne 0 ]; then 35 | echo "$output" | awk '{ print "\t" $0 }' 36 | echo -e "checkov ${RED}KO${NC}" | awk '{ print "\t" $0 }' 37 | fail=1 38 | else 39 | echo -e "checkov ${GREEN}OK${NC}" | awk '{ print "\t" $0 }' 40 | fi 41 | 42 | # cfn-lint 43 | output=$(eval cfn-lint -- "$file") 44 | if [ $? -ne 0 ]; then 45 | echo "$output" | awk '{ print "\t" $0 }' 46 | echo -e "cfn-lint ${RED}KO${NC}" | awk '{ print "\t" $0 }' 47 | fail=1 48 | else 49 | echo -e "cfn-lint ${GREEN}OK${NC}" | awk '{ print "\t" $0 }' 50 | fi 51 | 52 | if [ "$(basename $file)" == "${exclude_files[0]}" ] || [ "$(basename $file)" == "${exclude_files[1]}" ] || [ "$(basename $file)" == "${exclude_files[2]}" ]; then 53 | echo -e "cfn_nag_scan ${YELLOW}SKIP${NC} For::Each breaks cfn_nag" | awk '{ print "\t" $0 }' 54 | continue 55 | fi 56 | 57 | 58 | # cfn_nag_scan 59 | output=$(eval cfn_nag_scan --input-path "$file") 60 | if [ $? -ne 0 ]; then 61 | echo "$output" | awk '{ print "\t" $0 }' 62 | echo -e "cfn_nag_scan ${RED}KO${NC}" | awk '{ print "\t" $0 }' 63 | fail=1 64 | else 65 | echo -e "cfn_nag_scan ${GREEN}OK${NC}" | awk '{ print "\t" $0 }' 66 | fi 67 | 68 | if [ $fail -ne 0 ]; then 69 | ((failure_count++)) 70 | else 71 | ((success_count++)) 72 | fi 73 | done 74 | 75 | echo "Successful lints: $success_count" 76 | echo "Failed lints: $failure_count" 77 | if [ $failure_count -ne 0 ]; then 78 | exit 1 79 | else 80 | exit 0 81 | fi 82 | -------------------------------------------------------------------------------- /utils/pylint.py: -------------------------------------------------------------------------------- 1 | """ This script shows pylint for all Lambda Functions with ZipFile code in yaml 2 | 3 | """ 4 | import os 5 | import glob 6 | import subprocess 7 | 8 | import cfn_tools # pip install cfn-flip 9 | 10 | 11 | FOLDER_PATH = 'data-collection/deploy/' 12 | TMP_DIR = '.tmp' 13 | PYLINT_DISABLE = [ 14 | 'C0301', # Line too long 15 | 'C0103', # Invalid name of module 16 | 'C0114', # Missing module docstring 17 | 'C0116', # Missing function or method docstring 18 | 'W1203', # Use lazy % formatting in logging functions (logging-fstring-interpolation) 19 | 'W1201', # Use lazy % formatting in logging functions (logging-not-lazy) 20 | ] 21 | BANDIT_SKIP = [ 22 | 'B101', # Assert 23 | 'B108', # Hardcoded_tmp_directory 24 | ] 25 | 26 | def pylint(filename): 27 | """ call pylint """ 28 | try: 29 | res = subprocess.check_output( 30 | f'pylint {filename} --disable {",".join(PYLINT_DISABLE)}'.split(), 31 | stderr=subprocess.PIPE, 32 | universal_newlines=True, 33 | ) 34 | return res 35 | except subprocess.CalledProcessError as exc: 36 | return exc.stdout 37 | 38 | def bandit(filename): 39 | """ call bandit """ 40 | try: 41 | res = subprocess.check_output( 42 | f'bandit {filename} --skip {",".join(BANDIT_SKIP)}'.split(), 43 | stderr=subprocess.PIPE, 44 | universal_newlines=True, 45 | ) 46 | if 'No issues identified.' in str(res): 47 | return 'Bandit: No issues identified.' # skip verbose 48 | return res 49 | except subprocess.CalledProcessError as exc: 50 | return exc.stdout 51 | 52 | def tab(text, indent="\t"): 53 | """ returns text with a tab """ 54 | return '\n'.join([indent + line for line in text.splitlines()]) 55 | 56 | def main(): 57 | """ run pylint for all lambda functions """ 58 | file_list = glob.glob(os.path.join(FOLDER_PATH, "*.yaml")) 59 | file_list.sort(key=os.path.getmtime, reverse=True) 60 | for filename in file_list: 61 | try: 62 | with open(filename, encoding='utf-8') as template_file: 63 | template = cfn_tools.load_yaml(template_file.read()) 64 | except Exception: 65 | print(f'failed to load {filename}') 66 | continue 67 | for name, res in template['Resources'].items(): 68 | if isinstance(res, dict) and res['Type'] == 'AWS::Lambda::Function': 69 | code = res.get('Properties', {}).get('Code', {}).get('ZipFile') 70 | if not code: 71 | continue 72 | code_dir = TMP_DIR + '/' + os.path.basename(filename).rsplit('.', 1)[0] + "/" + name + '/' 73 | os.makedirs(code_dir, exist_ok=True) 74 | 75 | py_fn = code_dir + '/code.py' 76 | with open(py_fn, 'w', encoding='utf-8') as py_f: 77 | py_f.write(code) 78 | print(filename, name) 79 | print(tab(pylint(py_fn))) 80 | print(tab(bandit(py_fn))) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | --------------------------------------------------------------------------------