├── .github
    └── workflows
    │   ├── python.yml
    │   ├── secrets-scan.yml
    │   ├── security-scan.yml
    │   └── shellcheck-scan.yml
├── .gitignore
├── .images
    ├── architecture-data-collection-compute-optimizer.png
    ├── architecture-data-collection-deploy.png
    ├── architecture-data-collection-detailed.png
    ├── architecture-data-collection.png
    ├── architecture-data-export-replication-to-secondary.png
    ├── architecture-data-exports-advanced.png
    ├── architecture-data-exports.png
    ├── architecture-legacy-cur.png
    ├── deployment-guide-button.svg
    └── documentation.svg
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── SECURITY.md
├── case-summarization
    ├── GUARDRAIL.md
    ├── README.md
    ├── deploy
    │   ├── case-summarization.yaml
    │   └── guardrail.yaml
    ├── images
    │   └── archi.png
    ├── layer
    │   ├── build-layer.sh
    │   ├── publish-lambda-layer.sh
    │   └── requirements.txt
    └── utils
    │   ├── bump-release.py
    │   ├── release.sh
    │   ├── upload.sh
    │   └── version.json
├── data-collection
    ├── CHANGELOG.md
    ├── CONTRIBUTING.md
    ├── README.md
    ├── deploy
    │   ├── account-collector.yaml
    │   ├── data
    │   │   └── rds_graviton_mapping.csv
    │   ├── deploy-data-collection.yaml
    │   ├── deploy-data-read-permissions.yaml
    │   ├── deploy-in-linked-account.yaml
    │   ├── deploy-in-management-account.yaml
    │   ├── module-aws-feeds.yaml
    │   ├── module-backup.yaml
    │   ├── module-budgets.yaml
    │   ├── module-compute-optimizer.yaml
    │   ├── module-cost-anomaly.yaml
    │   ├── module-cost-explorer-rightsizing.yaml
    │   ├── module-ecs-chargeback.yaml
    │   ├── module-health-events.yaml
    │   ├── module-inventory.yaml
    │   ├── module-isv-feeds.yaml
    │   ├── module-license-manager.yaml
    │   ├── module-organization.yaml
    │   ├── module-pricing.yaml
    │   ├── module-quicksight.yaml
    │   ├── module-rds-usage.yaml
    │   ├── module-service-quotas.yaml
    │   ├── module-support-cases.yaml
    │   ├── module-transit-gateway.yaml
    │   ├── module-trusted-advisor.yaml
    │   ├── module-workspaces-metrics.yaml
    │   └── source
    │   │   ├── ecs
    │   │       └── Athena
    │   │       │   ├── bu_usage_view.sql
    │   │       │   ├── cluster_metadata_view.sql
    │   │       │   ├── ec2_cluster_costs_view.sql
    │   │       │   └── ecs_chargeback_report.sql
    │   │   ├── partition_repair_util.py
    │   │   ├── regions.csv
    │   │   ├── s3_backwards_comp.py
    │   │   ├── s3_files_migration.py
    │   │   └── step-functions
    │   │       ├── awsfeeds-state-machine-v1.json
    │   │       ├── crawler-state-machine.json
    │   │       ├── health-detail-state-machine.json
    │   │       ├── main-state-machine-v2.json
    │   │       ├── main-state-machine-v3.json
    │   │       ├── main-state-machine.json
    │   │       └── standalone-state-machine.json
    └── utils
    │   ├── bump-release.py
    │   ├── release.sh
    │   ├── upload.sh
    │   └── version.json
├── data-exports
    ├── README.md
    ├── deploy
    │   ├── cur-aggregation.yaml
    │   └── data-exports-aggregation.yaml
    └── utils
    │   └── release.sh
├── pytest.ini
├── rls
    ├── .gitignore
    ├── README.md
    ├── deploy
    │   ├── create_rls.py
    │   └── deploy_cid_rls.yaml
    └── utils
    │   ├── build.sh
    │   ├── bump-release.py
    │   ├── qs_s3_manifest.json
    │   ├── release.sh
    │   ├── tagger
    │       ├── aws_org_tagger_lambda.py
    │       └── data.csv
    │   ├── tox.ini
    │   └── version.json
├── security-hub
    └── deploy
    │   └── module-securityhub.yaml
├── test
    ├── README.md
    ├── cleanup.py
    ├── conftest.py
    ├── debugstackets.yml
    ├── run-test-from-scratch.sh
    ├── test_from_scratch.py
    └── utils.py
└── utils
    ├── lint.sh
    └── pylint.py


/.github/workflows/python.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Python Quality Check
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     branches:
 7 |       - '*'
 8 | 
 9 | jobs:
10 | 
11 |   pylint-scan:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Git clone the repository
15 |         uses: actions/checkout@v3
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.10'
20 |       - name: Install packages
21 |         run: |
22 |           pip install cfn-flip pylint urllib3 boto3 bandit
23 |       - name: Pylint all
24 |         run: |
25 |           python utils/pylint.py
26 | 


--------------------------------------------------------------------------------
/.github/workflows/secrets-scan.yml:
--------------------------------------------------------------------------------
 1 | name: TruffleHog Secrets Scan
 2 | on: [pull_request]
 3 | jobs:
 4 |   TruffleHog:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - name: Checkout code
 8 |         uses: actions/checkout@v3
 9 |         with:
10 |           fetch-depth: 0
11 |       - name: TruffleHog OSS
12 |         uses: trufflesecurity/trufflehog@main
13 |         with:
14 |           path: ./
15 |           base: ${{ github.event.repository.default_branch }}
16 |           head: HEAD
17 |           extra_args: --debug --only-verified


--------------------------------------------------------------------------------
/.github/workflows/security-scan.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Security Scan
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     branches:
 7 |       - '*'
 8 | 
 9 | jobs:
10 | 
11 |   cfn-lint-cfn-nag-scan:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Git clone the repository
15 |         uses: actions/checkout@v3
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.10'
20 |       - name: Set up Ruby
21 |         uses: ruby/setup-ruby@v1
22 |         with:
23 |           ruby-version: '3.1'
24 |       - name: Install CFN tools
25 |         run: |
26 |           gem install cfn-nag
27 |       - name: Install cfn-lint
28 |         run: |
29 |           pip install cfn-lint checkov
30 |           pip install --no-cache-dir packaging cyclonedx-python-lib=='5.2.0' #https://github.com/bridgecrewio/checkov/issues/5841
31 |       - name: Scan all templates
32 |         run: |
33 |           utils/lint.sh
34 | 


--------------------------------------------------------------------------------
/.github/workflows/shellcheck-scan.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Shellcheck Scan
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     branches:
 7 |       - '*'
 8 | 
 9 | jobs:
10 | 
11 |   shellcheck-scan:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Git clone the repository
15 |         uses: actions/checkout@v3
16 |       - name: Scan all bash
17 |         run: |
18 |           find ./ -type f -name "*.sh"  -exec shellcheck {} +
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .DS_Store
163 | .vscode
164 | 
165 | # Packaged modules
166 | fof.zip
167 | ecs.zip
168 | ta.zip
169 | 
170 | # Local sandbox
171 | sandbox/
172 | scratch/
173 | migration_log.csv
174 | # Pylint custom script temporal folder
175 | .tmp
176 | 
177 | data-collection/lambda-layers/python/
178 | data-collection/lambda-layers/layer.zip
179 | data-collection/test/clean-html.py
180 | 


--------------------------------------------------------------------------------
/.images/architecture-data-collection-compute-optimizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection-compute-optimizer.png


--------------------------------------------------------------------------------
/.images/architecture-data-collection-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection-deploy.png


--------------------------------------------------------------------------------
/.images/architecture-data-collection-detailed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection-detailed.png


--------------------------------------------------------------------------------
/.images/architecture-data-collection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-collection.png


--------------------------------------------------------------------------------
/.images/architecture-data-export-replication-to-secondary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-export-replication-to-secondary.png


--------------------------------------------------------------------------------
/.images/architecture-data-exports-advanced.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-exports-advanced.png


--------------------------------------------------------------------------------
/.images/architecture-data-exports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-data-exports.png


--------------------------------------------------------------------------------
/.images/architecture-legacy-cur.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/.images/architecture-legacy-cur.png


--------------------------------------------------------------------------------
/.images/deployment-guide-button.svg:
--------------------------------------------------------------------------------
 1 | <svg width="288" height="54" viewBox="0 0 144 27" xmlns="http://www.w3.org/2000/svg">
 2 |   <title>Deployment Guide</title>
 3 |   <defs>
 4 |     <linearGradient x1="50%" y1="0%" x2="50%" y2="100%" id="a">
 5 |       <stop stop-color="#FFE4B2" offset="0%"/>
 6 |       <stop stop-color="#F79800" offset="100%"/>
 7 |     </linearGradient>
 8 |     <linearGradient x1="45.017%" y1="100%" x2="68.082%" y2="3.32%" id="b">
 9 |       <stop stop-color="#151443" offset="0%"/>
10 |       <stop stop-color="#6D80B2" offset="100%"/>
11 |     </linearGradient>
12 |   </defs>
13 |   <g fill="none" fill-rule="evenodd">
14 |     <path d="M2 5v17c0 1.66 1.34 3 3 3h125.5c6.348 0 11.5-5.15 11.5-11.5C142 7.148 136.852 2 130.5 2H5C3.34 2 2 3.34 2 5z" fill="url(#a)"/>
15 |     <path d="M2 5v17c0 1.66 1.34 3 3 3h125.5c6.348 0 11.5-5.15 11.5-11.5C142 7.148 136.852 2 130.5 2H5C3.34 2 2 3.34 2 5zM0 5c0-2.762 2.233-5 5-5h125.5c7.456 0 13.5 6.043 13.5 13.5 0 7.456-6.05 13.5-13.5 13.5H5c-2.762 0-5-2.232-5-5V5z" fill="#0058A5"/>
16 |     <circle fill="url(#b)" cx="129.5" cy="13.5" r="9.5"/>
17 |     <path fill="#FFF" d="M133 13.5l-5 4.5V9"/>
18 |     <text x="10" y="18" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="black">Deployment Guide</text>
19 |   </g>
20 | </svg>
21 | 


--------------------------------------------------------------------------------
/.images/documentation.svg:
--------------------------------------------------------------------------------
 1 | <svg width="288" height="54" viewBox="0 0 144 27" xmlns="http://www.w3.org/2000/svg">
 2 |   <title>Documentation</title>
 3 |   <defs>
 4 |     <linearGradient x1="50%" y1="0%" x2="50%" y2="100%" id="a">
 5 |       <stop stop-color="#FFE4B2" offset="0%"/>
 6 |       <stop stop-color="#F79800" offset="100%"/>
 7 |     </linearGradient>
 8 |     <linearGradient x1="45.017%" y1="100%" x2="68.082%" y2="3.32%" id="b">
 9 |       <stop stop-color="#151443" offset="0%"/>
10 |       <stop stop-color="#6D80B2" offset="100%"/>
11 |     </linearGradient>
12 |   </defs>
13 |   <g fill="none" fill-rule="evenodd">
14 |     <path d="M2 5v17c0 1.66 1.34 3 3 3h125.5c6.348 0 11.5-5.15 11.5-11.5C142 7.148 136.852 2 130.5 2H5C3.34 2 2 3.34 2 5z" fill="url(#a)"/>
15 |     <path d="M2 5v17c0 1.66 1.34 3 3 3h125.5c6.348 0 11.5-5.15 11.5-11.5C142 7.148 136.852 2 130.5 2H5C3.34 2 2 3.34 2 5zM0 5c0-2.762 2.233-5 5-5h125.5c7.456 0 13.5 6.043 13.5 13.5 0 7.456-6.05 13.5-13.5 13.5H5c-2.762 0-5-2.232-5-5V5z" fill="#0058A5"/>
16 |     <circle fill="url(#b)" cx="129.5" cy="13.5" r="9.5"/>
17 |     <path fill="#FFF" d="M133 13.5l-5 4.5V9"/>
18 |     <text x="10" y="18" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="black">Documentation</text>
19 |   </g>
20 | </svg>
21 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Contribution to different modules
48 | Please follow specific guides for contribution to specific elements of the CID framework
49 | * [data-collection](/data-collection/CONTRIBUTING.md)
50 | 
51 | ## Code of Conduct
52 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
53 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
54 | opensource-codeofconduct@amazon.com with any additional questions or comments.
55 | 
56 | 
57 | ## Security issue notifications
58 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
59 | 
60 | 
61 | ## Licensing
62 | 
63 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
64 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Cloud Intelligence Dashboards - Data Collection
  2 | 
  3 | ## Table of Contents
  4 | 1. [Overview](#Overview)
  5 | 1. [Architecture of Data Exports](#Architecture-of-Data-Exports)
  6 | 1. [Architecture of Data Collection](#Architecture-of-Data-Collection)
  7 | 1. [Cost](#Cost)
  8 | 1. [Prerequisites](#Prerequisites)
  9 | 1. [Regions](#Regions)
 10 | 1. [Deployment and Cleanup Steps](#Deployment-and-Cleanup-Steps)
 11 | 1. [Changelogs](#Changelogs)
 12 | 1. [Feedback](#Feedback)
 13 | 1. [Security](#Security)
 14 | 1. [License](#License)
 15 | 1. [Notices](#Notices)
 16 | 
 17 | ## Overview
 18 | This repository is a part of [Cloud Intelligence Dashboards](https://catalog.workshops.aws/awscid), a project that provides AWS customers with a series of in-depth and customizable dashboards for the most comprehensive cost and usage details to help optimize cost, track usage goals, and achieve operational excellence.
 19 | 
 20 | This repository contains following elements:
 21 | * [data-exports](/data-exports) - a Cloud Formation Templates for AWS Data Exports, such as Cost and Usage Report 2.0 and others. This allows a replication of Exports from your Management Account(s) to a Dedicated Data Collection Accounts as well as aggregation of multiple Exports from a set of Linked Accounts.
 22 | * [data-collection](/data-collection) - a set of Cloud Formation Templates for collecting infrastructure operational data from Management and Linked Accounts. Such as data from AWS Trusted Advisor, AWS Compute Optimizer, Inventories, Pricing, AWS Health, AWS Support Cases etc. See more about types of data collected [here](/data-collection).
 23 | * [case-summarization](/case-summarization) - an additional Cloud Formation Template for deploying the AWS Support Case Summarization plugin that offers the capability to summarize cases through Generative AI powered by Amazon Bedrock.
 24 | * [rls](/rls) - a stack for managing Row Level Security for CID Dashboards.
 25 | * [security-hub](/security-hub) - Collection of data from AWS Security Hub.
 26 | 
 27 | All Data Collections can be used independently from Dashboards. Typically data collections store data on [Amazon S3 Bucket](https://aws.amazon.com/s3/) and provide [AWS Glue](https://aws.amazon.com/glue/) tables and [Amazon Athena](https://aws.amazon.com/athena/) Views to explore and use these data.
 28 | 
 29 | ### Other AWS Services
 30 | * [Collection of AWS Config data](https://github.com/aws-samples/config-resource-compliance-dashboard)
 31 | 
 32 | ### Multi-cloud data
 33 | * [Collection of Azure Cost Data](https://github.com/aws-samples/aws-data-pipelines-for-azure-storage/)
 34 | * [Collection of GCP Cost Data](https://github.com/awslabs/cid-gcp-cost-dashboard/)
 35 | * [Collection of OCI Cost Data](https://github.com/awslabs/cid-oci-cost-dashboard/)
 36 | 
 37 | ## Architecture of Data Exports
 38 | ![Architecture of Data Exports](.images/architecture-data-exports.png  "Architecture of Data Exports")
 39 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) delivers daily the Cost & Usage Report (CUR2) to an [Amazon S3 Bucket](https://aws.amazon.com/s3/) in the Management Account.
 40 | 2. [Amazon S3](https://aws.amazon.com/s3/) replication rule copies Export data to a dedicated Data Collection Account S3 bucket automatically.
 41 | 3. [Amazon Athena](https://aws.amazon.com/athena/) allows querying data directly from the S3 bucket using an [AWS Glue](https://aws.amazon.com/glue/) table schema definition.
 42 | 4. [Amazon QuickSight](https://aws.amazon.com/quicksight/) datasets can read from [Amazon Athena](https://aws.amazon.com/athena/). Check Cloud Intelligence Dashboards.
 43 | 
 44 | See more in [data-exports](/data-exports).
 45 | 
 46 | 
 47 | 
 48 | ## Architecture of Data Collection
 49 | ![Architecture of Advanced Data Collection](.images/architecture-data-collection.png  "Architecture of Advanced Data Collection")
 50 | 1. The Advanced Data Collection can be deployed to enable advanced dashboards based on [AWS Trusted Advisor](https://aws.amazon.com/trustedadvisor/), [AWS Health Events](https://docs.aws.amazon.com/health/latest/ug/getting-started-phd.html) and other sources. Additional data is retrieved from [AWS Organization](https://aws.amazon.com/organizations/) or Linked Accounts. In this case [Amazon EventBridge](https://aws.amazon.com/eventbridge/) rule triggers an [AWS Step Functions](https://aws.amazon.com/step-functions/) for data collection modules on a configurable schedule.
 51 | 
 52 | 2. The "Account Collector" [AWS Lambda](https://aws.amazon.com/lambda/) in AWS Step Functions retrieves linked account details using [AWS Organizations API](https://docs.aws.amazon.com/organizations/latest/APIReference/Welcome.html).
 53 | 
 54 | 3. The "Data Collection" Lambda function in AWS Step Functions assumes role in each linked account to retrieve account-specific data via [AWS SDK](https://aws.amazon.com/sdk-for-python/).
 55 | 
 56 | 4. Retrieved data is stored in a centralized [Amazon S3 Bucket](https://aws.amazon.com/s3/).
 57 | 
 58 | 5. Advanced Cloud Intelligence Dashboards leverage [Amazon Athena](https://aws.amazon.com/athena/) and [Amazon QuickSight](https://aws.amazon.com/quicksight/) for comprehensive data analysis.
 59 | 
 60 | See more details in [data-collection](/data-collection).
 61 | 
 62 | 
 63 | ## Cost
 64 | The following table provides a sample cost breakdown for deploying of Foundational Dashboards with the default parameters in the US East (N. Virginia) Region for one month. 
 65 | 
 66 | | AWS Service                     | Dimensions                    | Monthly Cost [USD] |
 67 | |---------------------------------|-------------------------------|--------------------|
 68 | | S3                              | Monthly storage               | $5-10*             |
 69 | | AWS Lambda                      | On the schedule 1/14 days     | $<3*               |
 70 | | AWS Step Functions              | On the schedule 1/14 days     | $<3*               |
 71 | | AWS Glue Crawler                | On schedule                   | $<3*               |
 72 | | AWS Athena                      | Data scanned monthly          | $15*               |
 73 | | **Total Estimated Monthly Cost** |                              | **<$50**           |
 74 | 
 75 | \* Costs are relative to the size of collected data (number of workloads, modules activated, AWS Accounts, Regions etc) and configured data collection frequency.
 76 | 
 77 | Pleas use AWS Pricing Calculator for precise estimation.
 78 | 
 79 | ## Prerequisites
 80 | You need access to AWS Accounts. We recommend deployment of the Data Collection in a dedicated Data Collection Account, other than your Management (Payer) Account. You can use it to aggregate data from multiple Management (Payer) Accounts or multiple Linked Accounts.
 81 | 
 82 | If you do not have access to the Management/Payer Account, you can still collect some types fo data across multiple Linked accounts.
 83 | 
 84 | ## Regions
 85 | Make sure you are installing data collection in the same region where you are going to use the data to avoid cross region charges.
 86 | 
 87 | | Region Name | Region Code | Available |
 88 | |:------------ | :-------------| :-------------|
 89 | | Africa (Cape Town) | af-south-1 |  |
 90 | | Asia Pacific (Tokyo) | ap-northeast-1 |  :heavy_check_mark: |
 91 | | Asia Pacific (Seoul) | ap-northeast-2 |  :heavy_check_mark: |
 92 | | Asia Pacific (Mumbai) | ap-south-1 |  :heavy_check_mark: |
 93 | | Asia Pacific (Singapore) | ap-southeast-1 |  :heavy_check_mark: |
 94 | | Asia Pacific (Sydney) | ap-southeast-2 |  :heavy_check_mark: |
 95 | | Asia Pacific (Jakarta) | ap-southeast-3 |  |
 96 | | Canada (Central) | ca-central-1 |  :heavy_check_mark: |
 97 | | China (Beijing) | cn-north-1 |  |
 98 | | Europe (Frankfurt) | eu-central-1 |  :heavy_check_mark: |
 99 | | Europe (Zurich) | eu-central-2 |  |
100 | | Europe (Stockholm) | eu-north-1 |  :heavy_check_mark: |
101 | | Europe (Milan) | eu-south-1 |  |
102 | | Europe (Spain) | eu-south-2 |  |
103 | | Europe (Ireland) | eu-west-1 |  :heavy_check_mark: |
104 | | Europe (London) | eu-west-2 |  :heavy_check_mark: |
105 | | Europe (Paris) | eu-west-3 |  :heavy_check_mark: |
106 | | South America (São Paulo) | sa-east-1 |  :heavy_check_mark: |
107 | | US East (N. Virginia) | us-east-1 |  :heavy_check_mark: |
108 | | US East (Ohio) | us-east-2 |  :heavy_check_mark: |
109 | | AWS GovCloud (US-East) | us-gov-east-1 |  |
110 | | AWS GovCloud (US-West) | us-gov-west-1 |  |
111 | | US West (Oregon) | us-west-2 |  :heavy_check_mark: |
112 | 
113 | 
114 | ## Deployment and Cleanup Steps
115 | Reference to folders.
116 | * [data-exports](/data-exports)
117 | * [data-collection](/data-collection)
118 | * [case-summarization](/case-summarization)
119 | * [rls](/rls)
120 | * [security-hub](/security-hub)
121 | 
122 | ## Changelogs
123 | Check [Releases](/../../releases)
124 | 
125 | ## Feedback
126 | Please reference to [this page](https://catalog.workshops.aws/awscid/en-US/feedback-support)
127 | 
128 | ## Contribution
129 | See [CONTRIBUTING](CONTRIBUTING.md) for more information.
130 | 
131 | ## Security
132 | When you build systems on AWS infrastructure, security responsibilities are shared between you and AWS. This [shared responsibility
133 | model](https://aws.amazon.com/compliance/shared-responsibility-model/) reduces your operational burden because AWS operates, manages, and
134 | controls the components including the host operating system, the virtualization layer, and the physical security of the facilities in
135 | which the services operate. For more information about AWS security, visit [AWS Cloud Security](http://aws.amazon.com/security/).
136 | 
137 | See [SECURITY](SECURITY.md) for more information.
138 | 
139 | ## License
140 | This project is licensed under the Apache-2.0 License. See the [LICENSE](LICENSE) file.
141 | 
142 | ## Notices
143 | Dashboards and their content: (a) are for informational purposes only, (b) represents current AWS product offerings and practices, which are subject to change without notice, and (c) does not create any commitments or assurances from AWS and its affiliates, suppliers or licensors. AWS content, products or services are provided “as is” without warranties, representations, or conditions of any kind, whether express or implied. The responsibilities and liabilities of AWS to its customers are controlled by AWS agreements, and this document is not part of, nor does it modify, any agreement between AWS and its customers.
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | ## Reporting Security Issues
 2 | 
 3 | We take all security reports seriously.
 4 | When we receive such reports,
 5 | we will investigate and subsequently address
 6 | any potential vulnerabilities as quickly as possible.
 7 | If you discover a potential security issue in this project,
 8 | please notify AWS/Amazon Security via our
 9 | [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/)
10 | or directly via email to [AWS Security](mailto:aws-security@amazon.com) and [CID Team](mailto:cloud-intelligence-dashboards@amazon.com).
11 | Please do *not* create a public GitHub issue in this project.
12 | 
13 | Also please check Security FAQ [here](https://catalog.workshops.aws/awscid/en-US/faqs#security).
14 | 


--------------------------------------------------------------------------------
/case-summarization/GUARDRAIL.md:
--------------------------------------------------------------------------------
 1 | ## AWS Bedrock Guardrail
 2 | Amazon Bedrock Guardrail is a crucial security feature for generative AI applications that helps implement safeguards based on specific use cases and responsible AI policies. It provides an additional layer of protection on top of the native safeguards offered by foundation models (FMs)[1][2].
 3 | 
 4 | ## Key Features and Importance
 5 | 
 6 | Amazon Bedrock Guardrails offers several important security features:
 7 | 
 8 | 1. **Content Filtering**: It helps block harmful content by evaluating both user inputs and model responses. The system can filter out content related to hate speech, insults, sexual content, violence, and misconduct[2].
 9 | 
10 | 2. **Topic Restrictions**: Organizations can define specific topics to avoid, ensuring that interactions remain relevant to their business and align with company policies[2].
11 | 
12 | 3. **Sensitive Information Protection**: The system can detect and redact personally identifiable information (PII) in user inputs and model responses, helping to protect user privacy[2][3].
13 | 
14 | 4. **Custom Word Filtering**: It allows the configuration of custom words or phrases to be blocked, including profanity or specific terms like competitor names[2].
15 | 
16 | 5. **Hallucination Detection**: Contextual grounding checks help detect and filter out hallucinations in model responses, ensuring more accurate and trustworthy information[2].
17 | 
18 | ## Security Importance
19 | 
20 | The importance of Amazon Bedrock Guardrails for security cannot be overstated:
21 | 
22 | 1. **Enhanced Content Safety**: It can block up to 85% more harmful content compared to native FM protections, significantly improving the safety of AI applications[2].
23 | 
24 | 2. **Consistent Security Across Models**: Guardrails work with all large language models in Amazon Bedrock, providing a uniform level of security regardless of the underlying model[2].
25 | 
26 | 3. **Customizable Safeguards**: Organizations can create multiple guardrails with different configurations, tailoring security measures to specific applications and use cases[1][3].
27 | 
28 | 4. **Compliance and Responsible AI**: By allowing fine-tuned control over content and interactions, Guardrails help organizations adhere to their responsible AI policies and maintain regulatory compliance[2].
29 | 
30 | 5. **Protection Against Prompt Attacks**: The system safeguards against prompt injection and jailbreak attempts, enhancing overall security[2].
31 | 
32 | Amazon Bedrock Guardrails plays a vital role in ensuring that generative AI applications remain safe, relevant, and aligned with organizational policies. By providing robust, customizable security features, it enables businesses to leverage the power of AI while mitigating potential risks associated with harmful or inappropriate content[1][2][3].
33 | 
34 | ### Reasonable Defaults
35 | 
36 | This plugin comes with the following reasonable defaults that can be overriden through the parameters exposed by the CloudFormation template:
37 | 
38 | | Parameter | Description | Default |
39 | | --- | --- | --- |
40 | | BlockedInputMessage | Message to return when the Amazon Bedrock Guardrail blocks a prompt. | {"executive_summary":"Amazon Bedrock Guardrails has blocked the AWS Support Case Summarization.","proposed_solutions":"","actions":"","references":[],"tam_involved":"","feedback":""} |
41 | | BlockedOutputMessage | Message to return when the Amazon Bedrock Guardrail blocks a model response | '' |
42 | | IncludeSexualContentFilter | Whether to include Sexual Content Filter in the Guardrail or not | 'yes' |
43 | | SexualContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces. | 'HIGH' |
44 | | SexualContentFilterOutputStrength | The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
45 | | IncludeViolentContentFilter | Whether to include Violent Content Filter in the Guardrail or not | 'yes' |
46 | | ViolentContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
47 | | ViolentContentFilterOutputStrength | The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
48 | | IncludeHateContentFilter | Whether to include Violent Content Filter in the Guardrail or not | 'yes' |
49 | | HateContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
50 | | HateContentFilterOutputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
51 | | IncludeInsultsContentFilter | Whether to include Insults Content Filter in the Guardrail or not | 'yes' |
52 | | InsultsContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
53 | | InsultsContentFilterOutputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
54 | | IncludeMisconductContentFilter | Whether to include Insults Content Filter in the Guardrail or not | 'yes' |
55 | | MisconductContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
56 | | MisconductContentFilterOutputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
57 | | IncludePromptAttackContentFilter | Whether to include Insults Content Filter in the Guardrail or not | 'yes' |
58 | | PromptAttackContentFilterInputStrength | The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces | 'HIGH' |
59 | 
60 | ### References & Further reading
61 | 
62 | * [1] How Amazon Bedrock Guardrails works https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-how.html
63 | * [2] Generative AI Data Governance - Amazon Bedrock Guardrails - AWS https://aws.amazon.com/bedrock/guardrails/
64 | * [3] Stop harmful content in models using Amazon Bedrock Guardrails https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
65 | 
66 | ## Usage
67 | 
68 | This stack will deploy a minimalistic Amazon Bedrock Guardrail that will filter out any inputs or outputs that can be assimilated to prompt hacking, sexual, violent, misconduct, hatred speech or insults. Any additional fine-tuning of filters can be acheived by customizing this template.
69 | 
70 | ## Support and Contribution
71 | 
72 | See [CONTRIBUTING](../../../CONTRIBUTING.md) for more information.
73 | 
74 | ## Security
75 | 
76 | See [SECURITY](../../../SECURITY.md) for more information.
77 | 
78 | ## License
79 | 
80 | This project is licensed under the Apache-2.0 License.
81 | 
82 | 


--------------------------------------------------------------------------------
/case-summarization/README.md:
--------------------------------------------------------------------------------
 1 | ## AWS Support Case Summarization Plugin
 2 | 
 3 | ### About
 4 | 
 5 | This plugin is aimed at augmenting the exerience of the AWS Support Cases Radar which is part of the [Cloud Intelligence Dashboards Framework](https://catalog.workshops.aws/awscid) by leveraging Generative AI powered by Amazon Bedrock to summarize AWS Support Case Communications and help customers achieve operation excellence.
 6 | 
 7 | This plugin contains the following elements:
 8 | * [case-summarization](README.md) - a CloudFormation Template for deploying the AWS Support Case Summarization Plugin that integrates seamlessly with the Data Collection Framework.
 9 | 
10 | ### Architecture
11 | 
12 | ![Architecture](/plugins/support-case-summarization/images/archi.png)
13 | 
14 | ### Reasonable Defaults
15 | 
16 | This plugin comes with the following reasonable defaults that can be overriden through the parameters exposed by the CloudFormation template:
17 | 
18 | | Parameter | Description | Default |
19 | | --- | --- | --- |
20 | | BedrockRegion | The AWS Region from which the Summarization is performed | us-east-1 |
21 | | Instructions | Additional instructions passed to the Large Language Model for the summarization process customizability | '' |
22 | | Provider | Large Language Model Provider for the summarization process customizability | Anthropic |
23 | | FoundationModel | Foundation Model to be used for the summarization process | Claude 3.5 Sonnet |
24 | | InferenceType | Summarization process Inference Type | 'ON_DEMAND' |
25 | | Temperature | Summarization process Temperature | 0 |
26 | | MaxTokens | Summarization process Maximum Tokens | 8096 |
27 | | MaxRetries | Summarization process Maximum Retries | 30 |
28 | | Timeout | Summarization process Timeout in seconds | 60 |
29 | | BatchSize | Summarization process Batch Size for parallel processing | 1 |
30 | 
31 | ### Installation
32 | 
33 | #### 1. Enable Amazon Bedrock Target Model Access In the Data Collection Account
34 | 
35 | - See [Add or remove access to Amazon Bedrock foundation models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) for guidance.
36 | 
37 | #### 2. Deploy the AWS Support Case Summarization Stack In the Data Collection Account
38 | 
39 |     * <kbd> <br> [Launch Stack >>](https://console.aws.amazon.com/cloudformation/home#/stacks/create/review?&templateURL=https://aws-managed-cost-intelligence-dashboards.s3.amazonaws.com/cfn/case-summarization/case-summarization.yaml&stackName=CidSupportCaseSummarizationStack)  <br> </kbd>
40 | 
41 | 
42 | ## Guardrail
43 | 
44 | See [GUARDRAIL](GUARDRAIL.md) for more information.
45 | 
46 | 
47 | ## Support and Contribution
48 | 
49 | See [CONTRIBUTING](CONTRIBUTING.md) for more information.
50 | 
51 | ## Security
52 | 
53 | See [SECURITY](SECURITY.md) for more information.
54 | 
55 | ## Limitations
56 | 
57 | As of today, the AWS Support Cases Summarization plugin does not make use of Amazon Bedrock Guardrails. See [issue](https://github.com/run-llama/llama_index/issues/17217).
58 | 
59 | ## License
60 | 
61 | This project is licensed under the Apache-2.0 License.
62 | 


--------------------------------------------------------------------------------
/case-summarization/deploy/guardrail.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: CID AWS Bedrock Guardrail Template Stack v0.0.1
  3 | Metadata:
  4 |   AWS::CloudFormation::Interface:
  5 |     ParameterGroups:
  6 |       - Label:
  7 |             default: 'Amazon Bedrock Guardrail parameters'
  8 |         Parameters:
  9 |           - BlockedInputMessage
 10 |           - BlockedOutputMessage
 11 |           - IncludeSexualContentFilter
 12 |           - SexualContentFilterInputStrength
 13 |           - SexualContentFilterOutputStrength
 14 |           - IncludeViolentContentFilter
 15 |           - ViolentContentFilterInputStrength
 16 |           - ViolentContentFilterOutputStrength
 17 |           - IncludeHateContentFilter
 18 |           - HateContentFilterInputStrength
 19 |           - HateContentFilterOutputStrength
 20 |           - IncludeInsultsContentFilter
 21 |           - InsultsContentFilterInputStrength
 22 |           - InsultsContentFilterOutputStrength
 23 |           - IncludeMisconductContentFilter
 24 |           - MisconductContentFilterInputStrength
 25 |           - MisconductContentFilterOutputStrength
 26 |           - IncludePromptAttackContentFilter
 27 |       - Label:
 28 |             default: 'Technical parameters'
 29 |         Parameters:
 30 |           - CFDataName
 31 |           - ResourcePrefix
 32 | 
 33 | Parameters:
 34 |   CFDataName:
 35 |     Type: String
 36 |     Description: The name of what this cf is doing.
 37 |     Default: bedrock-guardrail
 38 |   ResourcePrefix:
 39 |     Type: String
 40 |     Description: This prefix will be placed in front of all resources created. Note you may wish to add a dash at the end to make more readable (e.g. 'prefix-'). This parameter CANNOT BE UPDATED. Delete and re-create stack if needed an update.
 41 |     Default: "CID-DC-"
 42 |   BlockedInputMessage:
 43 |     Type: String
 44 |     Description: Message to return when the Amazon Bedrock Guardrail blocks a prompt.
 45 |     MaxLength: 500
 46 |     Default: '{"executive_summary":"Amazon Bedrock Guardrails has blocked the AWS Support Case Summarization.","proposed_solutions":"","actions":"","references":[],"tam_involved":"","feedback":""}'
 47 |   BlockedOutputMessage:
 48 |     Type: String
 49 |     Description: Message to return when the Amazon Bedrock Guardrail blocks a model response.
 50 |     MaxLength: 500
 51 |     Default: '{"executive_summary":"Amazon Bedrock Guardrails has blocked the AWS Support Case Summarization.","proposed_solutions":"","actions":"","references":[],"tam_involved":"","feedback":""}'
 52 |   IncludeSexualContentFilter:
 53 |     Type: String
 54 |     Description: "Whether to include Sexual Content Filter in the Guardrail or not"
 55 |     AllowedValues: ['yes', 'no']
 56 |     Default: 'yes'
 57 |   SexualContentFilterInputStrength:
 58 |     Type: String
 59 |     Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
 60 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
 61 |     Default: 'HIGH'
 62 |   SexualContentFilterOutputStrength:
 63 |     Type: String
 64 |     Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
 65 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
 66 |     Default: 'HIGH'
 67 |   IncludeViolentContentFilter:
 68 |     Type: String
 69 |     Description: "Whether to include Violent Content Filter in the Guardrail or not"
 70 |     AllowedValues: ['yes', 'no']
 71 |     Default: 'yes'
 72 |   ViolentContentFilterInputStrength:
 73 |     Type: String
 74 |     Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
 75 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
 76 |     Default: 'HIGH'
 77 |   ViolentContentFilterOutputStrength:
 78 |     Type: String
 79 |     Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
 80 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
 81 |     Default: 'HIGH'
 82 |   IncludeHateContentFilter:
 83 |     Type: String
 84 |     Description: "Whether to include Violent Content Filter in the Guardrail or not"
 85 |     AllowedValues: ['yes', 'no']
 86 |     Default: 'yes'
 87 |   HateContentFilterInputStrength:
 88 |     Type: String
 89 |     Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
 90 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
 91 |     Default: 'HIGH'
 92 |   HateContentFilterOutputStrength:
 93 |     Type: String
 94 |     Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
 95 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
 96 |     Default: 'HIGH'
 97 |   IncludeInsultsContentFilter:
 98 |     Type: String
 99 |     Description: "Whether to include Insults Content Filter in the Guardrail or not"
100 |     AllowedValues: ['yes', 'no']
101 |     Default: 'yes'
102 |   InsultsContentFilterInputStrength:
103 |     Type: String
104 |     Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
105 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
106 |     Default: 'HIGH'
107 |   InsultsContentFilterOutputStrength:
108 |     Type: String
109 |     Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
110 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
111 |     Default: 'HIGH'
112 |   IncludeMisconductContentFilter:
113 |     Type: String
114 |     Description: "Whether to include Misconduct Content Filter in the Guardrail or not"
115 |     AllowedValues: ['yes', 'no']
116 |     Default: 'yes'
117 |   MisconductContentFilterInputStrength:
118 |     Type: String
119 |     Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
120 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
121 |     Default: 'HIGH'
122 |   MisconductContentFilterOutputStrength:
123 |     Type: String
124 |     Description: "The strength of the content filter to apply to model responses. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
125 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
126 |     Default: 'HIGH'
127 |   IncludePromptAttackContentFilter:
128 |     Type: String
129 |     Description: "Whether to include Prompt Attack Content Filter in the Guardrail or not"
130 |     AllowedValues: ['yes', 'no']
131 |     Default: 'yes'
132 |   PromptAttackContentFilterInputStrength:
133 |     Type: String
134 |     Description: "The strength of the content filter to apply to prompts. As you increase the filter strength, the likelihood of filtering harmful content increases and the probability of seeing harmful content in your application reduces."
135 |     AllowedValues: ['NONE', 'LOW', 'MEDIUM', 'HIGH']
136 |     Default: 'HIGH'
137 | 
138 | Conditions:
139 |   DeploySexualContentFilter: !Equals [ !Ref IncludeSexualContentFilter, "yes"]
140 |   DeployViolentContentFilter: !Equals [ !Ref IncludeViolentContentFilter, "yes"]
141 |   DeployHateContentFilter: !Equals [ !Ref IncludeHateContentFilter, "yes"]
142 |   DeployInsultsContentFilter: !Equals [ !Ref IncludeInsultsContentFilter, "yes"]
143 |   DeployMisconductContentFilter: !Equals [ !Ref IncludeMisconductContentFilter, "yes"]
144 |   DeployPromptAttackContentFilter: !Equals [ !Ref IncludePromptAttackContentFilter, "yes"]
145 | 
146 | Resources:
147 |   BedrockGuardrail:
148 |     Type: AWS::Bedrock::Guardrail
149 |     Properties:
150 |        Name: !Sub "${ResourcePrefix}${CFDataName}"
151 |        Description: Amazon Bedrock Guardrail
152 |        BlockedInputMessaging: !Ref BlockedInputMessage
153 |        BlockedOutputsMessaging: !Ref BlockedOutputMessage
154 |        ContentPolicyConfig:
155 |         FiltersConfig:
156 |           - !If
157 |             - DeploySexualContentFilter
158 |             - InputStrength: !Ref SexualContentFilterInputStrength
159 |               OutputStrength: !Ref SexualContentFilterOutputStrength
160 |               Type: SEXUAL
161 |             - !Ref AWS::NoValue
162 |           - !If
163 |             - DeployViolentContentFilter
164 |             - InputStrength: !Ref ViolentContentFilterInputStrength
165 |               OutputStrength: !Ref ViolentContentFilterOutputStrength
166 |               Type: VIOLENCE
167 |             - !Ref AWS::NoValue
168 |           - !If
169 |             - DeployHateContentFilter
170 |             - InputStrength: !Ref HateContentFilterInputStrength
171 |               OutputStrength: !Ref HateContentFilterOutputStrength
172 |               Type: HATE
173 |             - !Ref AWS::NoValue
174 |           - !If
175 |             - DeployInsultsContentFilter
176 |             - InputStrength: !Ref InsultsContentFilterInputStrength
177 |               OutputStrength: !Ref InsultsContentFilterOutputStrength
178 |               Type: INSULTS
179 |             - !Ref AWS::NoValue
180 |           - !If
181 |             - DeployMisconductContentFilter
182 |             - InputStrength: !Ref MisconductContentFilterInputStrength
183 |               OutputStrength: !Ref MisconductContentFilterOutputStrength
184 |               Type: MISCONDUCT
185 |             - !Ref AWS::NoValue
186 |           - !If
187 |             - DeployPromptAttackContentFilter
188 |             - InputStrength: !Ref PromptAttackContentFilterInputStrength
189 |               OutputStrength: 'NONE'
190 |               Type: PROMPT_ATTACK
191 |             - !Ref AWS::NoValue
192 |        WordPolicyConfig:
193 |         ManagedWordListsConfig:
194 |           - Type: PROFANITY
195 |     Metadata:
196 |       cfn-lint:
197 |         config:
198 |           ignore_checks:
199 |             - E3032 # Guardrail FiltersConfig Variabilization False Positive
200 | 
201 |   BedrockGuardrailVersion:
202 |     Type: AWS::Bedrock::GuardrailVersion
203 |     Properties:
204 |       Description: Amazon Bedrock Guardrail
205 |       GuardrailIdentifier: !Ref BedrockGuardrail
206 | 


--------------------------------------------------------------------------------
/case-summarization/images/archi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/cloud-intelligence-dashboards-data-collection/537795920750287e90220f97bf7c0d2ba6025882/case-summarization/images/archi.png


--------------------------------------------------------------------------------
/case-summarization/layer/build-layer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script builds the Lambda Layer that contains Pydantic & Llama_index
 3 | 
 4 | git_root=$(git rev-parse --show-toplevel)
 5 | # shellcheck disable=SC2155 disable=SC2002
 6 | export version=$(cat "${git_root}/case-summarization/utils/version.json" | jq .version --raw-output)
 7 | export prefix='llm'
 8 | cd "$(dirname "$0")" || exit
 9 | 
10 | function build_layer {
11 |     echo 'Building a layer'
12 |     rm -rf ./python
13 |     mkdir -p ./python
14 |     python3 -m pip install   --only-binary=:all: --platform=manylinux2014_x86_64 --target=./python --requirement=./requirements.txt
15 |     du -sh ./python # must be less then 256M
16 |     rm -rf "$prefix-$version.zip"
17 |     zip -qr "$prefix-$version.zip" ./python
18 |     ls -h -l "$prefix-$version.zip"
19 |     rm -rf ./python
20 | }
21 | 
22 | build_layer 1>&2
23 | 
24 | ls "$prefix-$version.zip"


--------------------------------------------------------------------------------
/case-summarization/layer/publish-lambda-layer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script can be used for release or testing of lambda layers upload.
 3 | 
 4 | # First build layer
 5 | git_root=$(git rev-parse --show-toplevel)
 6 | cd "${git_root}/case-summarization/layer/" || exit
 7 | layer=$(./build-layer.sh)
 8 | 
 9 | # Then publish on s3
10 | export AWS_REGION=us-east-1
11 | export STACK_SET_NAME=LayerBuckets
12 | aws cloudformation list-stack-instances \
13 |   --stack-set-name $STACK_SET_NAME \
14 |   --query 'Summaries[].[StackId,Region]' \
15 |   --output text |
16 |   while read -r stack_id region; do
17 |     echo "uploading $layer to $region"
18 |     # shellcheck disable=SC2016
19 |     bucket=$(aws cloudformation list-stack-resources --stack-name "$stack_id" \
20 |       --query 'StackResourceSummaries[?LogicalResourceId == `LayerBucket`].PhysicalResourceId' \
21 |       --region "$region" --output text)
22 |     # shellcheck disable=SC2181
23 |     output=$(aws s3api put-object \
24 |       --bucket "$bucket" \
25 |       --key "cid-llm-lambda-layer/$layer" \
26 |       --body "./$layer")
27 |     # shellcheck disable=SC2181 disable=SC2002
28 |     if [ $? -ne 0 ]; then
29 |       echo "Error: $output"
30 |     else
31 |       echo "Uploaded successfuly"
32 |     fi
33 |   done
34 | 
35 | echo 'Cleanup'
36 | rm -vf "./$layer"
37 | 
38 | echo 'Done'


--------------------------------------------------------------------------------
/case-summarization/layer/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index-llms-bedrock>=0.3.3
2 | pydantic-core
3 | 


--------------------------------------------------------------------------------
/case-summarization/utils/bump-release.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import git
 4 | import json
 5 | 
 6 | repo = git.Repo('.')
 7 | 
 8 | print(repo.git.execute('git checkout main'.split()))
 9 | print(repo.git.execute('git pull'.split()))
10 | 
11 | 
12 | old_ver = json.load(open("data-collection/utils/version.json"))['version']
13 | 
14 | print (old_ver)
15 | bump='patch'
16 | if len(sys.argv)>1:
17 | 	bump = sys.argv[1]
18 | 
19 | maj, minor, patch = map(int, old_ver.split('.'))
20 | 
21 | if bump=='patch':
22 | 	new_ver = '.'.join(map(str,[maj, minor, patch + 1]))
23 | elif bump=='minor':
24 | 	new_ver = '.'.join(map(str,[maj, minor + 1, 0]))
25 | else:
26 | 	raise NotImplementedError('only patch and minor are implemented')
27 | 
28 | print(repo.git.execute(f"git checkout -b release/{new_ver}".split()))
29 | 
30 | 
31 | tx = open("data-collection/utils/version.json").read()
32 | with open("data-collection/utils/version.json", "w") as f:
33 | 	f.write(tx.replace(old_ver,new_ver))
34 | 
35 | 
36 | filenames = [
37 |     'data-collection/deploy/deploy-data-read-permissions.yaml',
38 |     'data-collection/deploy/deploy-data-collection.yaml',
39 |     'data-collection/deploy/deploy-in-management-account.yaml',
40 |     'data-collection/deploy/deploy-in-linked-account.yaml',
41 | 	"data-collection/utils/version.json",
42 | ]
43 | for filename in filenames:
44 | 	tx = open(filename).read()
45 | 	with open(filename, "w") as f:
46 | 		f.write(tx.replace(f"v{old_ver}", f"v{new_ver}"))
47 | 
48 | 
49 | print(repo.git.execute('git diff HEAD --unified=0'.split()))
50 | 
51 | print('to undo:\n git checkout HEAD -- cfn-templates/cid-cfn.yml cid/_version.py')
52 | print(f"to continue:\n git commit -am 'release {new_ver}'; git push origin 'release/{new_ver}'")
53 | 


--------------------------------------------------------------------------------
/case-summarization/utils/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2016,SC2086,SC2162
 3 | # This script can be used for release
 4 | 
 5 | export CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards
 6 | 
 7 | code_path=$(git rev-parse --show-toplevel)/case-summarization/deploy
 8 | 
 9 | echo "sync to central bucket"
10 | aws s3 sync $code_path/       s3://$CENTRAL_BUCKET/cfn/case-summarization/


--------------------------------------------------------------------------------
/case-summarization/utils/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2086
 3 | # This script uploads CloudFormation files to S3 bucket. Can be used with any testing bucket or prod.
 4 | # see also README.md
 5 | 
 6 | if [ -n "$1" ]; then
 7 |   bucket=$1
 8 | else
 9 |   echo "ERROR: First parameter not supplied. Provide a bucket name. aws-well-architected-labs for prod aws-wa-labs-staging for stage "
10 |   echo " prod  aws-well-architected-labs "
11 |   exit 1
12 | fi
13 | code_path=$(git rev-parse --show-toplevel)/case-summarization/deploy
14 | 
15 | echo "Sync to $bucket"
16 | aws s3 sync $code_path/       s3://$bucket/cfn/case-summarization/
17 | echo 'Done'
18 | 


--------------------------------------------------------------------------------
/case-summarization/utils/version.json:
--------------------------------------------------------------------------------
1 | {
2 |     "version": "0.0.1"
3 | }


--------------------------------------------------------------------------------
/data-collection/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | Starting from release v3.0.4 all release notes are captured in https://github.com/awslabs/cid-framework/releases
 4 | 
 5 | ## v3.0.2
 6 | Bug fixes
 7 | 
 8 | ## v3.0.1
 9 | Imporve performace of  Data Collection for Cost Optimization Hub 
10 | 
11 | ## v3.0.0
12 | Data Collection had a major refactoring and improvment. Also changed the mechanism of data collection lavereging Step Functions and added collection of the new services like AWS Cost Optimization Hub, AWS Backup
13 | 
14 | ## v2.1 
15 | Added new service AWS Anomaly Detection
16 | 
17 | ## v2 
18 | Data Collection was updated to collect data from multiple AWS Organizations
19 | 
20 | ## v1 
21 | Data Collection lab was published on the Well Architected Labs site
22 | 


--------------------------------------------------------------------------------
/data-collection/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # CONTRIBUTION GUIDE
 2 | 
 3 | # Development process
 4 | 
 5 | ## Testing environment
 6 | You can test this lab in a dedicated account that preferably has the following assets:
 7 | * EC2 instances, running more than 14 days (for Compute Optimizer and CE Rightsizing)
 8 | * At least one EBS and one volume Snapshot
 9 | * At least one custom AMI created from one of the snapshots
10 | * Activated Enterprise Support (for TA module)
11 | * An RDS cluster or single instance
12 | * An ECS cluster with one service deployed ([wordpress](https://aws.amazon.com/blogs/containers/running-wordpress-amazon-ecs-fargate-ecs/) will work fine)
13 | * A TransitGateway with at least one attachment
14 | * AWS Organization with trusted access enabled (see [Activate trusted access with AWS Organizations](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/stacksets-orgs-activate-trusted-access.html))
15 | * An S3 bucket to store the CloudFormation templates that deploy the infrastructure for the optimization data collection components
16 | 
17 | ## Prerequisites for local environment
18 | 
19 | ### General
20 | 
21 | * [cfn_nag_scan](https://github.com/stelligent/cfn_nag#installation)
22 | * python3.9+
23 | * `pip3 install -U boto3 pytest cfn-flip pylint checkov`
24 | * Configured AWS credentials
25 | * Install and configure [git-secrets](https://github.com/awslabs/git-secrets#installing-git-secrets)
26 | 
27 | ## Testing
28 | 
29 | ### AWS access credentials
30 | 
31 | For the purpose of testing, Python and shell scripts will make use of default AWS credentials setup in your ~/.aws folder.
32 | 
33 | Make sure you configure credentials for an organizations management account that will have the necessary permission to retrieve information from itself and other member accounts.
34 | 
35 | `aws configure` can be used to setup the AWS credentials in your local environment.
36 | 
37 | ### Steps
38 | 
39 | 1. (One time) Clone the project locally and install dependencies
40 | 
41 | ```bash
42 | git clone git@github.com:awslabs/cid-framework.git
43 | cd cid-framework
44 | pip3 install -U boto3 pytest cfn-flip pylint bandit cfn-lint checkov
45 | ```
46 | 
47 | Create a test bucket in test account. You can use any bucket.
48 | 
49 | ```bash
50 | export account_id=$(aws sts get-caller-identity --query "Account" --output text )
51 | export bucket=cid-$account_id-test
52 | 
53 | aws s3api create-bucket --bucket $bucket
54 | ```
55 | 
56 | 2. Check the quality of code:
57 | 
58 | Cloud Formation:
59 | ```bash
60 | ./utils/lint.sh
61 | ```
62 | 
63 | Pylint:
64 | ```bash
65 | python3 ./utils/pylint.py
66 | ```
67 | 
68 | 
69 | 3. Upload the code to a bucket and run integration tests in your testing environment
70 | 
71 | ```bash
72 | ./test/run-test-from-scratch.sh --no-teardown
73 | ```
74 | 
75 | The test will install stacks from scratch in a single account, then it will check the presence of Athena tables. After running tests, it will delete the stacks and all artifacts that are not deleted by CFN. You can avoid teardown by providing a flag `--no-teardown`.
76 | 
77 | 4. Create a merge request.
78 | 
79 | 
80 | # Release process (CID Team only)
81 | All Cloud Formation Templates are uploaded to buckets `aws-managed-cost-intelligence-dashboards*`.
82 | 
83 | ```bash
84 | ./data-collection/utils/release.sh
85 | ```
86 | 


--------------------------------------------------------------------------------
/data-collection/README.md:
--------------------------------------------------------------------------------
 1 | ## CID Data Collection
 2 | 
 3 | ### About
 4 | 
 5 | This projects demonstrates usage of AWS API for collecting various types of usage data.
 6 | 
 7 | For deployment and additional information reference to the [documentation](https://catalog.workshops.aws/awscid/data-collection).
 8 | 
 9 | [![Documentation](/.images/documentation.svg)](https://catalog.workshops.aws/awscid/data-collection)
10 | 
11 | 
12 | ### Architecture
13 | 
14 | ![Architecture](/.images/architecture-data-collection-detailed.png)
15 | 
16 | 1. [Amazon EventBridge](https://aws.amazon.com/eventbridge/) rule invokes [AWS Step Functions](https://aws.amazon.com/step-functions/) for every deployed data collection module based on schedule.
17 | 2. The Step Function launches a [AWS Lambda](https://aws.amazon.com/lambda/) function **Account Collector** that assumes **Read Role** in the Management accounts and retrieves linked accounts list via [AWS Organizations API](https://docs.aws.amazon.com/organizations/latest/userguide/orgs_integrate_services.html).
18 | 3. Step Functions launches **Data Collection Lambda** function for each collected Account.
19 | 4. Each data collection module Lambda function assumes an [IAM](https://aws.amazon.com/iam/) role in linked accounts and retrieves respective optimization data via [AWS SDK for Python (Boto3)](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html). Retrieved data is aggregated in an [Amazon S3](https://aws.amazon.com/s3/) bucket.
20 | 5. Once data is stored in the S3 bucket, Step Functions trigger an [AWS Glue](https://aws.amazon.com/glue/) crawler which creates or updates the table in the [AWS Glue Data Catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#data-catalog-intro).
21 | 6. Collected data is visualized with the [Cloud Intelligence Dashboards](https://aws.amazon.com/solutions/implementations/cloud-intelligence-dashboards/) using [Amazon QuickSight](https://aws.amazon.com/quicksight/) to get optimization recommendations and insights.
22 | 
23 | 
24 | ### Modules
25 | List of modules and objects collected:
26 | | Module Name                  | AWS Services          | Collected In        | Details  |
27 | | ---                          |  ---                  | ---                 | ---      |
28 | | `organization`               | [AWS Organizations](https://aws.amazon.com/organizations/)     | Management Accounts  |          |
29 | | `budgets`                    | [AWS Budgets](https://aws.amazon.com/aws-cost-management/aws-budgets/)           | Linked Accounts      |          |
30 | | `compute-optimizer`          | [AWS Compute Optimizer](https://aws.amazon.com/compute-optimizer/) | Management Accounts  | Requires [Enablement of Compute Optimizer](https://aws.amazon.com/compute-optimizer/getting-started/#:~:text=Opt%20in%20for%20Compute%20Optimizer,created%20automatically%20in%20your%20account.) |
31 | | `trusted-advisor`            | [AWS Trusted Advisor](https://aws.amazon.com/premiumsupport/technology/trusted-advisor/)   | Linked Accounts      | Requires Business, Enterprise or On-Ramp Support Level |
32 | | `support-cases`              | [AWS Support](https://aws.amazon.com/premiumsupport/)           | Linked Accounts      | Requires Business, Enterprise On-Ramp, or Enterprise Support plan |
33 | | `cost-explorer-cost-anomaly` | [AWS Cost Anomaly Detection](https://aws.amazon.com/aws-cost-management/aws-cost-anomaly-detection/)         | Management Accounts  |          |
34 | | `cost-explorer-rightsizing`  | [AWS Cost Explorer](https://aws.amazon.com/aws-cost-management/aws-cost-explorer/)     | Management Accounts  | DEPRECATED. Please use `Data Exports` for `Cost Optimization Hub` |
35 | | `inventory`                  | Various services      | Linked Accounts      | Collects `Amazon OpenSearch Domains`, `Amazon ElastiCache Clusters`, `RDS DB Instances`, `EBS Volumes`, `AMI`, `EC2 Instances`, `EBS Snapshot`, `RDS Snapshot`, `Lambda`, `RDS DB Clusters`, `EKS Clusters` |
36 | | `pricing`                    | Various services      | Data Collection Account | Collects pricing for `Amazon RDS`, `Amazon EC2`, `Amazon ElastiCache`, `AWS Lambda`, `Amazon OpenSearch`, `AWS Compute Savings Plan` |
37 | | `rds-usage`                  |  [Amazon RDS](https://aws.amazon.com/rds/)           | Linked Accounts      | Collects CloudWatch metrics for chargeback |
38 | | `transit-gateway`            |  [AWS Transit Gateway](https://aws.amazon.com/transit-gateway/)  | Linked Accounts      | Collects CloudWatch metrics for chargeback |
39 | | `ecs-chargeback`             |  [Amazon ECS](https://aws.amazon.com/ecs/)           | Linked Accounts      |  |
40 | | `backup`                     |  [AWS Backup](https://aws.amazon.com/backup/)           | Management Accounts  | Collects Backup Restore and Copy Jobs. Requires [activation of cross-account](https://docs.aws.amazon.com/aws-backup/latest/devguide/manage-cross-account.html#enable-cross-account) |
41 | | `health-events`              |  [AWS Health](https://aws.amazon.com/health/) | Management Accounts  | Collect AWS Health notifications via AWS Organizational view  |
42 | | `licence-manager`            |  [AWS License Manager](https://aws.amazon.com/license-manager/)  | Management Accounts  | Collect Licenses and Grants |
43 | | `aws-feeds`                  |  N/A                  | Data Collection Account | Collects Blog posts and News Feeds |
44 | | `quicksight`                 |  [Amazon QuickSight](https://aws.amazon.com/quicksight/)    | Data Collection Account | Collects QuickSight User and Group information in the Data Collection Account only |
45 | 
46 | 
47 | ### Deployment Overview
48 | 
49 | ![Deployment Architecture](/.images/architecture-data-collection-deploy.png)
50 | 
51 | 1. Deploy the Advanced Data Collection Permissions CloudFormation stack to Management (Payer) AWS Account. The Permissions CloudFormation stack in the Management (Payer) Account also deploys Permissions stacks to each of Linked accounts via StackSets.
52 | 
53 | 2. Deploy the Data Collection Stack to the Data Collection AWS Account
54 | 
55 | 
56 | For deployment and further information please reference to this [documentation](https://catalog.workshops.aws/awscid/data-collection).
57 | 
58 | [![Documentation](/.images/documentation.svg)](https://catalog.workshops.aws/awscid/data-collection)
59 | 
60 | 
61 | ### Contributing
62 | See [CONTRIBUTING.md](CONTRIBUTING.md)
63 | 


--------------------------------------------------------------------------------
/data-collection/deploy/data/rds_graviton_mapping.csv:
--------------------------------------------------------------------------------
  1 | ﻿Aurora,PostgreSQL,db.t2.medium,db.t4g.medium
  2 | Aurora,PostgreSQL,db.t3.medium,None
  3 | Aurora,PostgreSQL,db.t3.large,db.t4g.large
  4 | Aurora,PostgreSQL,db.r4.large,db.r6g.large
  5 | Aurora,PostgreSQL,db.r4.xlarge,db.r6g.xlarge
  6 | Aurora,PostgreSQL,db.r4.2xlarge,db.r6g.2xlarge
  7 | Aurora,PostgreSQL,db.r4.4xlarge,db.r6g.4xlarge
  8 | Aurora,PostgreSQL,db.r4.8xlarge,db.r6g.8xlarge
  9 | Aurora,PostgreSQL,db.r4.16xlarge,db.r6g.16xlarge
 10 | Aurora,PostgreSQL,db.r5.large,db.r6g.large
 11 | Aurora,PostgreSQL,db.r5.xlarge,db.r6g.xlarge
 12 | Aurora,PostgreSQL,db.r5.2xlarge,db.r6g.2xlarge
 13 | Aurora,PostgreSQL,db.r5.4xlarge,db.r6g.4xlarge
 14 | Aurora,PostgreSQL,db.r5.8xlarge,db.r6g.8xlarge
 15 | Aurora,PostgreSQL,db.r5.12xlarge,db.r6g.12xlarge
 16 | Aurora,PostgreSQL,db.r5.16xlarge,db.r6g.16xlarge
 17 | Aurora,PostgreSQL,db.r5.24xlarge,None
 18 | Aurora,MySQL,db.t2.small,None
 19 | Aurora,MySQL,db.t2.medium,db.t4g.medium
 20 | Aurora,MySQL,db.t3.small,None
 21 | Aurora,MySQL,db.t3.medium,db.t4g.medium
 22 | Aurora,MySQL,db.t3.large,db.t4g.large
 23 | Aurora,MySQL,db.r3.large,db.r6g.large
 24 | Aurora,MySQL,db.r3.xlarge,db.r6g.xlarge
 25 | Aurora,MySQL,db.r3.2xlarge,db.r6g.2xlarge
 26 | Aurora,MySQL,db.r3.4xlarge,db.r6g.4xlarge
 27 | Aurora,MySQL,db.r3.8xlarge,db.r6g.8xlarge
 28 | Aurora,MySQL,db.r4.large,db.r6g.large
 29 | Aurora,MySQL,db.r4.xlarge,db.r6g.xlarge
 30 | Aurora,MySQL,db.r4.2xlarge,db.r6g.2xlarge
 31 | Aurora,MySQL,db.r4.4xlarge,db.r6g.4xlarge
 32 | Aurora,MySQL,db.r4.8xlarge,db.r6g.8xlarge
 33 | Aurora,MySQL,db.r4.16xlarge,db.r6g.16xlarge
 34 | Aurora,MySQL,db.r5.large,db.r6g.large
 35 | Aurora,MySQL,db.r5.xlarge,db.r6g.xlarge
 36 | Aurora,MySQL,db.r5.2xlarge,db.r6g.2xlarge
 37 | Aurora,MySQL,db.r5.4xlarge,db.r6g.4xlarge
 38 | Aurora,MySQL,db.r5.8xlarge,db.r6g.8xlarge
 39 | Aurora,MySQL,db.r5.12xlarge,db.r6g.12xlarge
 40 | Aurora,MySQL,db.r5.16xlarge,db.r6g.16xlarge
 41 | Aurora,MySQL,db.r5.24xlarge,None
 42 | AmazonRDS,PostgreSQL,db.t2.micro,db.t4g.micro
 43 | AmazonRDS,PostgreSQL,db.t2.small,db.t4g.small
 44 | AmazonRDS,PostgreSQL,db.t2.medium,db.t4g.medium
 45 | AmazonRDS,PostgreSQL,db.t2.large,db.t4g.large
 46 | AmazonRDS,PostgreSQL,db.t2.xlarge,db.t4g.xlarge
 47 | AmazonRDS,PostgreSQL,db.t2.2xlarge,db.t4g.2xlarge
 48 | AmazonRDS,PostgreSQL,db.t3.micro,db.t4g.micro
 49 | AmazonRDS,PostgreSQL,db.t3.small,db.t4g.small
 50 | AmazonRDS,PostgreSQL,db.t3.medium,db.t4g.medium
 51 | AmazonRDS,PostgreSQL,db.t3.large,db.t4g.large
 52 | AmazonRDS,PostgreSQL,db.t3.xlarge,db.t4g.xlarge
 53 | AmazonRDS,PostgreSQL,db.t3.2xlarge,db.t4g.2xlarge
 54 | AmazonRDS,PostgreSQL,db.m3.medium,None
 55 | AmazonRDS,PostgreSQL,db.m3.large,db.m6g.large
 56 | AmazonRDS,PostgreSQL,db.m3.xlarge,db.m6g.xlarge
 57 | AmazonRDS,PostgreSQL,db.m3.2xlarge,db.m6g.2xlarge
 58 | AmazonRDS,PostgreSQL,db.m3.4xlarge,db.m6g.4xlarge
 59 | AmazonRDS,PostgreSQL,db.m3.10xlarge,db.m6g.10xlarge
 60 | AmazonRDS,PostgreSQL,db.m3.16xlarge,db.m6g.16xlarge
 61 | AmazonRDS,PostgreSQL,db.m4.large,db.m6g.large
 62 | AmazonRDS,PostgreSQL,db.m4.xlarge,db.m6g.xlarge
 63 | AmazonRDS,PostgreSQL,db.m4.2xlarge,db.m6g.2xlarge
 64 | AmazonRDS,PostgreSQL,db.m4.4xlarge,db.m6g.4xlarge
 65 | AmazonRDS,PostgreSQL,db.m4.10xlarge,db.m6g.10xlarge
 66 | AmazonRDS,PostgreSQL,db.m4.16xlarge,db.m6g.16xlarge
 67 | AmazonRDS,PostgreSQL,db.m5.large,db.m6g.large
 68 | AmazonRDS,PostgreSQL,db.m5.xlarge,db.m6g.xlarge
 69 | AmazonRDS,PostgreSQL,db.m5.2xlarge,db.m6g.2xlarge
 70 | AmazonRDS,PostgreSQL,db.m5.4xlarge,db.m6g.4xlarge
 71 | AmazonRDS,PostgreSQL,db.m5.8xlarge,db.m6g.8xlarge
 72 | AmazonRDS,PostgreSQL,db.m5.12xlarge,db.m6g.12xlarge
 73 | AmazonRDS,PostgreSQL,db.m5.16xlarge,db.m6g.16xlarge
 74 | AmazonRDS,PostgreSQL,db.m5.24xlarge,None
 75 | AmazonRDS,PostgreSQL,db.r3.large,db.r6g.large
 76 | AmazonRDS,PostgreSQL,db.r3.xlarge,db.r6g.xlarge
 77 | AmazonRDS,PostgreSQL,db.r3.2xlarge,db.r6g.2xlarge
 78 | AmazonRDS,PostgreSQL,db.r3.4xlarge,db.r6g.4xlarge
 79 | AmazonRDS,PostgreSQL,db.r3.8xlarge,db.r6g.8xlarge
 80 | AmazonRDS,PostgreSQL,db.r4.large,db.r6g.large
 81 | AmazonRDS,PostgreSQL,db.r4.xlarge,db.r6g.xlarge
 82 | AmazonRDS,PostgreSQL,db.r4.2xlarge,db.r6g.2xlarge
 83 | AmazonRDS,PostgreSQL,db.r4.4xlarge,db.r6g.4xlarge
 84 | AmazonRDS,PostgreSQL,db.r4.8xlarge,db.r6g.8xlarge
 85 | AmazonRDS,PostgreSQL,db.r4.16xlarge,db.r6g.16xlarge
 86 | AmazonRDS,PostgreSQL,db.r5.large,db.r6g.large
 87 | AmazonRDS,PostgreSQL,db.r5.xlarge,db.r6g.xlarge
 88 | AmazonRDS,PostgreSQL,db.r5.2xlarge,db.r6g.2xlarge
 89 | AmazonRDS,PostgreSQL,db.r5.4xlarge,db.r6g.4xlarge
 90 | AmazonRDS,PostgreSQL,db.r5.8xlarge,db.r6g.8xlarge
 91 | AmazonRDS,PostgreSQL,db.r5.12xlarge,db.r6g.12xlarge
 92 | AmazonRDS,PostgreSQL,db.r5.16xlarge,db.r6g.16xlarge
 93 | AmazonRDS,PostgreSQL,db.r5.24xlarge,None
 94 | AmazonRDS,PostgreSQL,db.r5b.large,db.r6gd.large
 95 | AmazonRDS,PostgreSQL,db.r5b.xlarge,db.r6gd.xlarge
 96 | AmazonRDS,PostgreSQL,db.r5b.2xlarge,db.r6gd.2xlarge
 97 | AmazonRDS,PostgreSQL,db.r5b.4xlarge,db.r6gd.4xlarge
 98 | AmazonRDS,PostgreSQL,db.r5b.8xlarge,db.r6gd.8xlarge
 99 | AmazonRDS,PostgreSQL,db.r5b.12xlarge,db.r6gd.12xlarge
100 | AmazonRDS,PostgreSQL,db.r5b.16xlarge,db.r6gd.16xlarge
101 | AmazonRDS,PostgreSQL,db.r5b.24xlarge,None
102 | AmazonRDS,MySQL,db.t2.micro,db.t4g.micro
103 | AmazonRDS,MySQL,db.t2.small,db.t4g.small
104 | AmazonRDS,MySQL,db.t2.medium,db.t4g.medium
105 | AmazonRDS,MySQL,db.t2.large,db.t4g.large
106 | AmazonRDS,MySQL,db.t2.xlarge,db.t4g.xlarge
107 | AmazonRDS,MySQL,db.t2.2xlarge,db.t4g.2xlarge
108 | AmazonRDS,MySQL,db.t3.micro,db.t4g.micro
109 | AmazonRDS,MySQL,db.t3.small,db.t4g.small
110 | AmazonRDS,MySQL,db.t3.medium,db.t4g.medium
111 | AmazonRDS,MySQL,db.t3.large,db.t4g.large
112 | AmazonRDS,MySQL,db.t3.xlarge,db.t4g.xlarge
113 | AmazonRDS,MySQL,db.t3.2xlarge,db.t4g.2xlarge
114 | AmazonRDS,MySQL,db.m3.medium,None
115 | AmazonRDS,MySQL,db.m3.large,db.m6g.large
116 | AmazonRDS,MySQL,db.m3.xlarge,db.m6g.xlarge
117 | AmazonRDS,MySQL,db.m3.2xlarge,db.m6g.2xlarge
118 | AmazonRDS,MySQL,db.m3.4xlarge,db.m6g.4xlarge
119 | AmazonRDS,MySQL,db.m3.10xlarge,db.m6g.10xlarge
120 | AmazonRDS,MySQL,db.m3.16xlarge,db.m6g.16xlarge
121 | AmazonRDS,MySQL,db.m4.large,db.m6g.large
122 | AmazonRDS,MySQL,db.m4.xlarge,db.m6g.xlarge
123 | AmazonRDS,MySQL,db.m4.2xlarge,db.m6g.2xlarge
124 | AmazonRDS,MySQL,db.m4.4xlarge,db.m6g.4xlarge
125 | AmazonRDS,MySQL,db.m4.10xlarge,db.m6g.10xlarge
126 | AmazonRDS,MySQL,db.m4.16xlarge,db.m6g.16xlarge
127 | AmazonRDS,MySQL,db.m5.large,db.m6g.large
128 | AmazonRDS,MySQL,db.m5.xlarge,db.m6g.xlarge
129 | AmazonRDS,MySQL,db.m5.2xlarge,db.m6g.2xlarge
130 | AmazonRDS,MySQL,db.m5.4xlarge,db.m6g.4xlarge
131 | AmazonRDS,MySQL,db.m5.8xlarge,db.m6g.8xlarge
132 | AmazonRDS,MySQL,db.m5.12xlarge,db.m6g.12xlarge
133 | AmazonRDS,MySQL,db.m5.16xlarge,db.m6g.16xlarge
134 | AmazonRDS,MySQL,db.m5.24xlarge,None
135 | AmazonRDS,MySQL,db.r3.large,db.r6g.large
136 | AmazonRDS,MySQL,db.r3.xlarge,db.r6g.xlarge
137 | AmazonRDS,MySQL,db.r3.2xlarge,db.r6g.2xlarge
138 | AmazonRDS,MySQL,db.r3.4xlarge,db.r6g.4xlarge
139 | AmazonRDS,MySQL,db.r3.8xlarge,db.r6g.8xlarge
140 | AmazonRDS,MySQL,db.r4.large,db.r6g.large
141 | AmazonRDS,MySQL,db.r4.xlarge,db.r6g.xlarge
142 | AmazonRDS,MySQL,db.r4.2xlarge,db.r6g.2xlarge
143 | AmazonRDS,MySQL,db.r4.4xlarge,db.r6g.4xlarge
144 | AmazonRDS,MySQL,db.r4.8xlarge,db.r6g.8xlarge
145 | AmazonRDS,MySQL,db.r4.16xlarge,db.r6g.16xlarge
146 | AmazonRDS,MySQL,db.r5.large,db.r6g.large
147 | AmazonRDS,MySQL,db.r5.xlarge,db.r6g.xlarge
148 | AmazonRDS,MySQL,db.r5.2xlarge,db.r6g.2xlarge
149 | AmazonRDS,MySQL,db.r5.4xlarge,db.r6g.4xlarge
150 | AmazonRDS,MySQL,db.r5.8xlarge,db.r6g.8xlarge
151 | AmazonRDS,MySQL,db.r5.12xlarge,db.r6g.12xlarge
152 | AmazonRDS,MySQL,db.r5.16xlarge,db.r6g.16xlarge
153 | AmazonRDS,MySQL,db.r5.24xlarge,None
154 | AmazonRDS,MySQL,db.r5b.large,db.r6gd.large
155 | AmazonRDS,MySQL,db.r5b.xlarge,db.r6gd.xlarge
156 | AmazonRDS,MySQL,db.r5b.2xlarge,db.r6gd.2xlarge
157 | AmazonRDS,MySQL,db.r5b.4xlarge,db.r6gd.4xlarge
158 | AmazonRDS,MySQL,db.r5b.8xlarge,db.r6gd.8xlarge
159 | AmazonRDS,MySQL,db.r5b.12xlarge,db.r6gd.12xlarge
160 | AmazonRDS,MySQL,db.r5b.16xlarge,db.r6gd.16xlarge
161 | AmazonRDS,MySQL,db.r5b.24xlarge,None
162 | AmazonRDS,MariaDB,db.t2.micro,db.t4g.micro
163 | AmazonRDS,MariaDB,db.t2.small,db.t4g.small
164 | AmazonRDS,MariaDB,db.t2.medium,db.t4g.medium
165 | AmazonRDS,MariaDB,db.t2.large,db.t4g.large
166 | AmazonRDS,MariaDB,db.t2.xlarge,db.t4g.xlarge
167 | AmazonRDS,MariaDB,db.t2.2xlarge,db.t4g.2xlarge
168 | AmazonRDS,MariaDB,db.t3.micro,db.t4g.micro
169 | AmazonRDS,MariaDB,db.t3.small,db.t4g.small
170 | AmazonRDS,MariaDB,db.t3.medium,db.t4g.medium
171 | AmazonRDS,MariaDB,db.t3.large,db.t4g.large
172 | AmazonRDS,MariaDB,db.t3.xlarge,db.t4g.xlarge
173 | AmazonRDS,MariaDB,db.t3.2xlarge,db.t4g.2xlarge
174 | AmazonRDS,MariaDB,db.m3.medium,None
175 | AmazonRDS,MariaDB,db.m3.large,db.m6g.large
176 | AmazonRDS,MariaDB,db.m3.xlarge,db.m6g.xlarge
177 | AmazonRDS,MariaDB,db.m3.2xlarge,db.m6g.2xlarge
178 | AmazonRDS,MariaDB,db.m3.4xlarge,db.m6g.4xlarge
179 | AmazonRDS,MariaDB,db.m3.10xlarge,db.m6g.10xlarge
180 | AmazonRDS,MariaDB,db.m3.16xlarge,db.m6g.16xlarge
181 | AmazonRDS,MariaDB,db.m4.large,db.m6g.large
182 | AmazonRDS,MariaDB,db.m4.xlarge,db.m6g.xlarge
183 | AmazonRDS,MariaDB,db.m4.2xlarge,db.m6g.2xlarge
184 | AmazonRDS,MariaDB,db.m4.4xlarge,db.m6g.4xlarge
185 | AmazonRDS,MariaDB,db.m4.10xlarge,db.m6g.10xlarge
186 | AmazonRDS,MariaDB,db.m4.16xlarge,db.m6g.16xlarge
187 | AmazonRDS,MariaDB,db.m5.large,db.m6g.large
188 | AmazonRDS,MariaDB,db.m5.xlarge,db.m6g.xlarge
189 | AmazonRDS,MariaDB,db.m5.2xlarge,db.m6g.2xlarge
190 | AmazonRDS,MariaDB,db.m5.4xlarge,db.m6g.4xlarge
191 | AmazonRDS,MariaDB,db.m5.8xlarge,db.m6g.8xlarge
192 | AmazonRDS,MariaDB,db.m5.12xlarge,db.m6g.12xlarge
193 | AmazonRDS,MariaDB,db.m5.16xlarge,db.m6g.16xlarge
194 | AmazonRDS,MariaDB,db.m5.24xlarge,None
195 | AmazonRDS,MariaDB,db.r3.large,db.r6g.large
196 | AmazonRDS,MariaDB,db.r3.xlarge,db.r6g.xlarge
197 | AmazonRDS,MariaDB,db.r3.2xlarge,db.r6g.2xlarge
198 | AmazonRDS,MariaDB,db.r3.4xlarge,db.r6g.4xlarge
199 | AmazonRDS,MariaDB,db.r3.8xlarge,db.r6g.8xlarge
200 | AmazonRDS,MariaDB,db.r4.large,db.r6g.large
201 | AmazonRDS,MariaDB,db.r4.xlarge,db.r6g.xlarge
202 | AmazonRDS,MariaDB,db.r4.2xlarge,db.r6g.2xlarge
203 | AmazonRDS,MariaDB,db.r4.4xlarge,db.r6g.4xlarge
204 | AmazonRDS,MariaDB,db.r4.8xlarge,db.r6g.8xlarge
205 | AmazonRDS,MariaDB,db.r4.16xlarge,db.r6g.16xlarge
206 | AmazonRDS,MariaDB,db.r5.large,db.r6g.large
207 | AmazonRDS,MariaDB,db.r5.xlarge,db.r6g.xlarge
208 | AmazonRDS,MariaDB,db.r5.2xlarge,db.r6g.2xlarge
209 | AmazonRDS,MariaDB,db.r5.4xlarge,db.r6g.4xlarge
210 | AmazonRDS,MariaDB,db.r5.8xlarge,db.r6g.8xlarge
211 | AmazonRDS,MariaDB,db.r5.12xlarge,db.r6g.12xlarge
212 | AmazonRDS,MariaDB,db.r5.16xlarge,db.r6g.16xlarge
213 | AmazonRDS,MariaDB,db.r5.24xlarge,None


--------------------------------------------------------------------------------
/data-collection/deploy/deploy-data-read-permissions.yaml:
--------------------------------------------------------------------------------
  1 | # https://github.com/awslabs/cid-data-collection-framework/blob/main/data-collection/v3.11.0/deploy/deploy-data-read-permissions.yaml
  2 | AWSTemplateFormatVersion: '2010-09-09'
  3 | Description: CID Data Collection - All-in-One for Management Account v3.11.0 - AWS Solution SO9011
  4 | Metadata:
  5 |   AWS::CloudFormation::Interface:
  6 |     ParameterGroups:
  7 |       - Label:
  8 |           default: "Deployment parameters"
  9 |         Parameters:
 10 |           - DataCollectionAccountID
 11 |           - ResourcePrefix
 12 |           - ManagementAccountRole
 13 |           - MultiAccountRoleName
 14 |           - OrganizationalUnitIds
 15 |           - AllowModuleReadInMgmt
 16 |           - CFNSourceBucket
 17 |       - Label:
 18 |           default: "Available modules"
 19 |         Parameters:
 20 |           - IncludeBackupModule
 21 |           - IncludeBudgetsModule
 22 |           - IncludeComputeOptimizerModule
 23 |           - IncludeCostAnomalyModule
 24 |           - IncludeSupportCasesModule
 25 |           - IncludeECSChargebackModule
 26 |           - IncludeHealthEventsModule
 27 |           - IncludeInventoryCollectorModule
 28 |           - IncludeRDSUtilizationModule
 29 |           - IncludeEUCUtilizationModule
 30 |           - IncludeRightsizingModule
 31 |           - IncludeTAModule
 32 |           - IncludeTransitGatewayModule
 33 |           - IncludeLicenseManagerModule
 34 |           - IncludeServiceQuotasModule
 35 |     ParameterLabels:
 36 |       ManagementAccountRole:
 37 |         default: "Management account role"
 38 |       DataCollectionAccountID:
 39 |         default: "Data Collection Account Id"
 40 |       MultiAccountRoleName:
 41 |         default: "Multi Account Role Name"
 42 |       OrganizationalUnitIds:
 43 |         default: "Comma Delimited list of Organizational Unit IDs. StackSets will deploy a read role in all AWS Accounts within those OUs. See your OU ID here: https://console.aws.amazon.com/organizations/v2/home/accounts (we recommend choosing OU ID of your Root)"
 44 |       AllowModuleReadInMgmt:
 45 |         default: "Allow creation of read roles for modules in management account"
 46 |       ResourcePrefix:
 47 |         default: "Role Prefix"
 48 |       CFNSourceBucket:
 49 |         default: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'"
 50 |       IncludeBudgetsModule:
 51 |         default: "Include AWS Budgets Collection Module"
 52 |       IncludeComputeOptimizerModule:
 53 |         default: "Include AWS Compute Optimizer Data Collection Module"
 54 |       IncludeCostAnomalyModule:
 55 |         default: "Include Cost Anomalies Data Collection Module"
 56 |       IncludeSupportCasesModule:
 57 |         default: "Include Support Cases Data Collection Module"
 58 |       IncludeECSChargebackModule:
 59 |         default: "Include ECS Chargeback Data Collection Module"
 60 |       IncludeInventoryCollectorModule:
 61 |         default: "Include Inventory Collector Module"
 62 |       IncludeRDSUtilizationModule:
 63 |         default: "Include RDS Utilization Data Collection Module"
 64 |       IncludeEUCUtilizationModule:
 65 |         default: "Include WorkSpaces Utilization Data Collection Module"
 66 |       IncludeRightsizingModule:
 67 |         default: "Include Rightsizing Recommendations Data Collection Module"
 68 |       IncludeTAModule:
 69 |         default: "Include AWS Trusted Advisor Data Collection Module"
 70 |       IncludeTransitGatewayModule:
 71 |         default: "Include AWS TransitGateway Collection Module"
 72 |       IncludeBackupModule:
 73 |         default: "Include AWS Backup Collection Module"
 74 |       IncludeHealthEventsModule:
 75 |         default: "Include AWS Health Events Module"
 76 |       IncludeLicenseManagerModule:
 77 |         default: "Include Marketplace Licensing Module"
 78 |       IncludeServiceQuotasModule:
 79 |         default: "Include Service Quotas Module"
 80 | Parameters:
 81 |   ManagementAccountRole:
 82 |     Type: String
 83 |     Description: The name of the IAM role that will be deployed in the management account which can retrieve AWS Organization data. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT
 84 |     Default: Lambda-Assume-Role-Management-Account
 85 |   DataCollectionAccountID:
 86 |     Type: String
 87 |     Description: AccountId of where the collector is deployed
 88 |   MultiAccountRoleName:
 89 |     Type: String
 90 |     Description: The name of the IAM role that will be deployed from the management account to linked accounts as a read only role. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT
 91 |     Default: "Optimization-Data-Multi-Account-Role"
 92 |   OrganizationalUnitIds:
 93 |     Type: String
 94 |     AllowedPattern: ^(ou-[a-z0-9]{4,32}-[a-z0-9]{8,32}|r-[a-z0-9]{4,32})(,(ou-[a-z0-9]{4,32}-[a-z0-9]{8,32}|r-[a-z0-9]{4,32}))*$
 95 |     Description: "(Ex: r-ab01,ou-ab01-abcd1234) List of Organizational Unit IDs you wish to collect data for. It can be a single organizational unit. The organization root ID is usually preferred to collect data from all the member accounts."
 96 |   AllowModuleReadInMgmt:
 97 |     Type: String
 98 |     Description: Allows the creation of the read data roles for modules in the management account
 99 |     AllowedValues:
100 |       - "yes"
101 |       - "no"
102 |   ResourcePrefix:
103 |     Type: String
104 |     Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable e.g. prefix-
105 |     Default: "CID-DC-"
106 |   CFNSourceBucket:
107 |     Type: String
108 |     Description: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'"
109 |     Default: "aws-managed-cost-intelligence-dashboards"
110 |   IncludeBudgetsModule:
111 |     Type: String
112 |     Description: Collects budgets from your accounts
113 |     AllowedValues: ['yes', 'no']
114 |     Default: 'no'
115 |   IncludeComputeOptimizerModule:
116 |     Type: String
117 |     Description: Collects AWS Compute Optimizer service recommendations
118 |     AllowedValues: ['yes', 'no']
119 |     Default: 'no'
120 |   IncludeCostAnomalyModule:
121 |     Type: String
122 |     Description: "Collects AWS Cost Explorer Cost Anomalies Recommendations"
123 |     AllowedValues: ['yes', 'no']
124 |     Default: 'no'
125 |   IncludeSupportCasesModule:
126 |     Type: String
127 |     Description: "Collects AWS Support Cases data"
128 |     AllowedValues: ['yes', 'no']
129 |     Default: 'no'
130 |   IncludeECSChargebackModule:
131 |     Type: String
132 |     Description: Collects data which shows costs associated with ECS Tasks leveraging EC2 instances within a Cluster
133 |     AllowedValues: ['yes', 'no']
134 |     Default: 'no'
135 |   IncludeInventoryCollectorModule:
136 |     Type: String
137 |     Description: Collects data about AMIs, EBS volumes and snapshots
138 |     AllowedValues: ['yes', 'no']
139 |     Default: 'no'
140 |   IncludeRDSUtilizationModule:
141 |     Type: String
142 |     Description: Collects RDS CloudWatch metrics from your accounts
143 |     AllowedValues: ['yes', 'no']
144 |     Default: 'no'
145 |   IncludeEUCUtilizationModule:
146 |     Type: String
147 |     Description: Collects WorkSpaces CloudWatch metrics from your accounts
148 |     AllowedValues: ['yes', 'no']
149 |     Default: 'no'
150 |   IncludeRightsizingModule:
151 |     Type: String
152 |     Description: "Collects AWS Cost Explorer Rightsizing Recommendations"
153 |     AllowedValues: ['yes', 'no']
154 |     Default: 'no'
155 |   IncludeTAModule:
156 |     Type: String
157 |     Description: Collects AWS Trusted Advisor recommendations data
158 |     AllowedValues: ['yes', 'no']
159 |     Default: 'no'
160 |   IncludeTransitGatewayModule:
161 |     Type: String
162 |     Description: Collects TransitGateway from your accounts
163 |     AllowedValues: ['yes', 'no']
164 |     Default: 'no'
165 |   IncludeBackupModule:
166 |     Type: String
167 |     Description: Collects AWS Backup events from your accounts
168 |     AllowedValues: ['yes', 'no']
169 |     Default: 'no'
170 |   IncludeHealthEventsModule:
171 |     Type: String
172 |     Description: Collects AWS Health Events from your accounts
173 |     AllowedValues: ['yes', 'no']
174 |     Default: 'no'
175 |   IncludeLicenseManagerModule:
176 |     Type: String
177 |     Description: Collects Marketplace Licensing information
178 |     AllowedValues: ['yes', 'no']
179 |     Default: 'no'
180 |   IncludeServiceQuotasModule:
181 |     Type: String
182 |     Description: Collects Service Quotas information
183 |     AllowedValues: ['yes', 'no']
184 |     Default: 'no'
185 | 
186 | Conditions:
187 |   DeployModuleReadInMgmt: !Equals [!Ref AllowModuleReadInMgmt, "yes"]
188 | 
189 | Resources:
190 |   DataCollectorMgmtAccountReadStack:
191 |     Type: AWS::CloudFormation::Stack
192 |     Properties:
193 |       TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-management-account.yaml"
194 |       Parameters:
195 |         DataCollectionAccountID: !Ref DataCollectionAccountID
196 |         ManagementAccountRole: !Ref ManagementAccountRole
197 |         ResourcePrefix: !Ref ResourcePrefix
198 |         IncludeComputeOptimizerModule: !Ref IncludeComputeOptimizerModule
199 |         IncludeCostAnomalyModule: !Ref IncludeCostAnomalyModule
200 |         IncludeRightsizingModule: !Ref IncludeRightsizingModule
201 |         IncludeBackupModule: !Ref IncludeBackupModule
202 |         IncludeHealthEventsModule: !Ref IncludeHealthEventsModule
203 |         IncludeLicenseManagerModule: !Ref IncludeLicenseManagerModule
204 |         IncludeServiceQuotasModule: !Ref IncludeServiceQuotasModule
205 |   DataCollectorMgmtAccountModulesReadStack:
206 |     Type: AWS::CloudFormation::Stack
207 |     Condition: DeployModuleReadInMgmt
208 |     Properties:
209 |       TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml"
210 |       Parameters:
211 |         DataCollectionAccountID: !Ref DataCollectionAccountID
212 |         MultiAccountRoleName: !Ref MultiAccountRoleName
213 |         ResourcePrefix: !Ref ResourcePrefix
214 |         IncludeTAModule: !Ref IncludeTAModule
215 |         IncludeSupportCasesModule: !Ref IncludeSupportCasesModule
216 |         IncludeInventoryCollectorModule: !Ref IncludeInventoryCollectorModule
217 |         IncludeECSChargebackModule: !Ref IncludeECSChargebackModule
218 |         IncludeRDSUtilizationModule: !Ref IncludeRDSUtilizationModule
219 |         IncludeEUCUtilizationModule: !Ref IncludeEUCUtilizationModule
220 |         IncludeBudgetsModule: !Ref IncludeBudgetsModule
221 |         IncludeTransitGatewayModule: !Ref IncludeTransitGatewayModule
222 |         IncludeServiceQuotasModule: !Ref IncludeServiceQuotasModule
223 |   DataCollectorOrgAccountModulesReadStackSet:
224 |     Type: AWS::CloudFormation::StackSet
225 |     Properties:
226 |       Description: "StackSet in charge of deploying read roles across organization accounts v3.11.0"
227 |       PermissionModel: SERVICE_MANAGED
228 |       AutoDeployment:
229 |         Enabled: true
230 |         RetainStacksOnAccountRemoval: false
231 |       ManagedExecution:
232 |         Active: true
233 |       OperationPreferences:
234 |         MaxConcurrentPercentage: 100
235 |         FailureTolerancePercentage: 100
236 |         RegionConcurrencyType: PARALLEL
237 |       Parameters:
238 |         - ParameterKey: DataCollectionAccountID
239 |           ParameterValue: !Ref DataCollectionAccountID
240 |         - ParameterKey: MultiAccountRoleName
241 |           ParameterValue: !Ref MultiAccountRoleName
242 |         - ParameterKey: ResourcePrefix
243 |           ParameterValue: !Ref ResourcePrefix
244 |         - ParameterKey: IncludeTAModule
245 |           ParameterValue: !Ref IncludeTAModule
246 |         - ParameterKey: IncludeSupportCasesModule
247 |           ParameterValue: !Ref IncludeSupportCasesModule
248 |         - ParameterKey: IncludeInventoryCollectorModule
249 |           ParameterValue: !Ref IncludeInventoryCollectorModule
250 |         - ParameterKey: IncludeECSChargebackModule
251 |           ParameterValue: !Ref IncludeECSChargebackModule
252 |         - ParameterKey: IncludeRDSUtilizationModule
253 |           ParameterValue: !Ref IncludeRDSUtilizationModule
254 |         - ParameterKey: IncludeEUCUtilizationModule
255 |           ParameterValue: !Ref IncludeEUCUtilizationModule
256 |         - ParameterKey: IncludeBudgetsModule
257 |           ParameterValue: !Ref IncludeBudgetsModule
258 |         - ParameterKey: IncludeTransitGatewayModule
259 |           ParameterValue: !Ref IncludeTransitGatewayModule
260 |         - ParameterKey: IncludeServiceQuotasModule
261 |           ParameterValue: !Ref IncludeServiceQuotasModule
262 |       StackInstancesGroup:
263 |         - DeploymentTargets:
264 |             OrganizationalUnitIds: !Split [",", !Ref OrganizationalUnitIds]
265 |           Regions:
266 |             - !Ref "AWS::Region"
267 |       Capabilities:
268 |         - CAPABILITY_IAM
269 |         - CAPABILITY_NAMED_IAM
270 |       StackSetName: !Sub "StackSet-${AWS::AccountId}-OptimizationDataRole"
271 |       TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml"
272 | 


--------------------------------------------------------------------------------
/data-collection/deploy/module-budgets.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: Retrieves Budgets data
  3 | Parameters:
  4 |   DatabaseName:
  5 |     Type: String
  6 |     Description: Name of the Athena database to be created to hold lambda information
  7 |     Default: optimization_data
  8 |   DestinationBucket:
  9 |     Type: String
 10 |     Description: Name of the S3 Bucket to be created to hold data information
 11 |     AllowedPattern: (?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$)
 12 |   DestinationBucketARN:
 13 |     Type: String
 14 |     Description: ARN of the S3 Bucket that exists or needs to be created to hold rightsizing information
 15 |   MultiAccountRoleName:
 16 |     Type: String
 17 |     Description: Name of the IAM role deployed in all accounts which can retrieve AWS Data.
 18 |   CFDataName:
 19 |     Type: String
 20 |     Description: The name of what this cf is doing.
 21 |     Default: budgets
 22 |   GlueRoleARN:
 23 |     Type: String
 24 |     Description: Arn for the Glue Crawler role
 25 |   Schedule:
 26 |     Type: String
 27 |     Description: EventBridge Schedule to trigger the data collection
 28 |     Default: "rate(1 day)"
 29 |   ResourcePrefix:
 30 |     Type: String
 31 |     Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable
 32 |   LambdaAnalyticsARN:
 33 |     Type: String
 34 |     Description: Arn of lambda for Analytics
 35 |   AccountCollectorLambdaARN:
 36 |     Type: String
 37 |     Description: Arn of the Account Collector Lambda
 38 |   CodeBucket:
 39 |     Type: String
 40 |     Description: Source code bucket
 41 |   StepFunctionTemplate:
 42 |     Type: String
 43 |     Description: S3 key to the JSON template for the StepFunction
 44 |   StepFunctionExecutionRoleARN:
 45 |     Type: String
 46 |     Description: Common role for Step Function execution
 47 |   SchedulerExecutionRoleARN:
 48 |     Type: String
 49 |     Description: Common role for module Scheduler execution
 50 |   DataBucketsKmsKeysArns:
 51 |     Type: String
 52 |     Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys."
 53 |     Default: ""
 54 | 
 55 | Outputs:
 56 |   StepFunctionARN:
 57 |     Description: ARN for the module's Step Function
 58 |     Value: !GetAtt ModuleStepFunction.Arn
 59 | Conditions:
 60 |   NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ]
 61 | Resources:
 62 |   LambdaRole:
 63 |     Type: AWS::IAM::Role
 64 |     Properties:
 65 |       RoleName: !Sub "${ResourcePrefix}${CFDataName}-LambdaRole"
 66 |       AssumeRolePolicyDocument:
 67 |         Statement:
 68 |           - Action:
 69 |               - sts:AssumeRole
 70 |             Effect: Allow
 71 |             Principal:
 72 |               Service:
 73 |                 - !Sub "lambda.${AWS::URLSuffix}"
 74 |         Version: 2012-10-17
 75 |       ManagedPolicyArns:
 76 |         - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
 77 |       Path: /
 78 |       Policies:
 79 |         - PolicyName: !Sub "${CFDataName}-MultiAccount-LambdaRole"
 80 |           PolicyDocument:
 81 |             Version: "2012-10-17"
 82 |             Statement:
 83 |               - Effect: "Allow"
 84 |                 Action: "sts:AssumeRole"
 85 |                 Resource: !Sub "arn:${AWS::Partition}:iam::*:role/${MultiAccountRoleName}"
 86 |         - PolicyName: "S3Access"
 87 |           PolicyDocument:
 88 |             Version: "2012-10-17"
 89 |             Statement:
 90 |               - Effect: "Allow"
 91 |                 Action:
 92 |                   - "s3:PutObject"
 93 |                 Resource:
 94 |                   - !Sub "${DestinationBucketARN}/*"
 95 |         - !If
 96 |           - NeedDataBucketsKms
 97 |           - PolicyName: "KMS"
 98 |             PolicyDocument:
 99 |               Version: "2012-10-17"
100 |               Statement:
101 |                 - Effect: "Allow"
102 |                   Action:
103 |                     - "kms:GenerateDataKey"
104 |                   Resource: !Split [ ',', !Ref DataBucketsKmsKeysArns ]
105 |           - !Ref AWS::NoValue
106 |     Metadata:
107 |       cfn_nag:
108 |         rules_to_suppress:
109 |           - id: W28 # Resource found with an explicit name, this disallows updates that require replacement of this resource
110 |             reason: "Need explicit name to identify role actions"
111 | 
112 |   LambdaFunction:
113 |     Type: AWS::Lambda::Function
114 |     Properties:
115 |       FunctionName: !Sub '${ResourcePrefix}${CFDataName}-Lambda'
116 |       Description: !Sub "Lambda function to retrieve ${CFDataName}"
117 |       Runtime: python3.12
118 |       Architectures: [x86_64]
119 |       Code:
120 |         ZipFile: |
121 |           #Authors:
122 |           # Stephanie Gooch - initial version
123 |           # Mohideen - Added Budgets tag collection module
124 |           import os
125 |           import json
126 |           import re
127 |           import logging
128 |           import datetime
129 |           from json import JSONEncoder
130 | 
131 |           import boto3
132 | 
133 |           BUCKET = os.environ["BUCKET_NAME"]
134 |           PREFIX = os.environ["PREFIX"]
135 |           ROLE_NAME = os.environ['ROLE_NAME']
136 |           TMP_FILE = "/tmp/data.json"
137 | 
138 |           logger = logging.getLogger(__name__)
139 |           logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
140 | 
141 |           class DateTimeEncoder(JSONEncoder):
142 |               """encoder for json with time object"""
143 |               def default(self, o):
144 |                   if isinstance(o, (datetime.date, datetime.datetime)):
145 |                       return o.isoformat()
146 |                   return None
147 | 
148 |           def clean_value(value):
149 |               """Clean string values by replacing special characters with underscores"""
150 |               if isinstance(value, str):
151 |                   return re.sub(r'\W+', '_', value)
152 |               elif isinstance(value, list):
153 |                   return [clean_value(v) for v in value]
154 |               return value
155 | 
156 |           def process_cost_filters(budget):
157 |               """Process and clean cost filters in budget"""
158 |               cost_filters = budget.get('CostFilters', {})
159 |               if not cost_filters:
160 |                   budget['CostFilters'] = {'Filter': ['None']}
161 |                   return
162 | 
163 |               cleaned_filters = {
164 |                   re.sub(r'\W+', '_', key): clean_value(value)
165 |                   for key, value in cost_filters.items()
166 |               }
167 |               budget['CostFilters'] = cleaned_filters
168 | 
169 |           def assume_role(account_id, service, region):
170 |               partition = boto3.session.Session().get_partition_for_region(region_name=region)
171 |               cred = boto3.client('sts', region_name=region).assume_role(
172 |                   RoleArn=f"arn:{partition}:iam::{account_id}:role/{ROLE_NAME}",
173 |                   RoleSessionName="data_collection"
174 |               )['Credentials']
175 |               return boto3.client(
176 |                   service,
177 |                   aws_access_key_id=cred['AccessKeyId'],
178 |                   aws_secret_access_key=cred['SecretAccessKey'],
179 |                   aws_session_token=cred['SessionToken']
180 |               )
181 | 
182 |           def lambda_handler(event, context): #pylint: disable=W0613
183 |               logger.info(f"Event data {json.dumps(event)}")
184 |               if 'account' not in event:
185 |                   raise ValueError(
186 |                       "Please do not trigger this Lambda manually."
187 |                       "Find the corresponding state machine in Step Functions and Trigger from there."
188 |                   )
189 |               collection_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
190 |               aws_partition = boto3.session.Session().get_partition_for_region(boto3.session.Session().region_name)
191 |               account = json.loads(event["account"])
192 |               account_id = account["account_id"]
193 |               account_name = account["account_name"]
194 |               payer_id = account["payer_id"]
195 | 
196 |               logger.info(f"Collecting data for account: {account_id}")
197 |               try:
198 |                   budgets_client = assume_role(account_id, "budgets", "us-east-1") # must be us-east-1
199 |                   count = 0
200 |                   with open(TMP_FILE, "w", encoding='utf-8') as f:
201 |                       for budget in budgets_client.get_paginator("describe_budgets").paginate(AccountId=account_id).search('Budgets'):
202 |                           if not budget:
203 |                               continue
204 |                           budget['collection_time'] = collection_time
205 |                           # Fetch tags for the budget using List tag for resource API
206 |                           budget_name = budget['BudgetName']
207 |                           budget_tags = budgets_client.list_tags_for_resource(ResourceARN=f"arn:{aws_partition}:budgets::{account_id}:budget/{budget_name}")
208 |                           budget.update({
209 |                               'Account_ID': account_id,
210 |                               'Account_Name': account_name,
211 |                               'Tags': budget_tags.get('ResourceTags') or []
212 |                           })
213 |                           # Fetch CostFilters if available
214 |                           process_cost_filters(budget)
215 |                           # Add column plannedbudgetslimit as type array
216 |                           budget_limits = budget.pop('PlannedBudgetLimits', {}) 
217 |                           budget['PlannedBudgetLimits_Flat'] = [
218 |                               {'date': key, 'Amount': value.get('Amount'), 'Unit': value.get('Unit')}
219 |                               for key, value in budget_limits.items()
220 |                           ]
221 |                           f.write(json.dumps(budget, cls=DateTimeEncoder) + "\n")
222 |                           count += 1
223 |                   logger.info(f"Budgets collected: {count}")
224 |                   s3_upload(account_id, payer_id)
225 |               except Exception as exc: #pylint: disable=broad-exception-caught
226 |                   if "AccessDenied" in str(exc):
227 |                       print(f'Failed to assume role {ROLE_NAME} in account {account_id}. Please make sure the role exists. {exc}')
228 |                   else:
229 |                       print(f'{exc}. Gracefully exiting from Lambda so we do not break all StepFunction Execution')
230 |                   return
231 | 
232 |           def s3_upload(account_id, payer_id):
233 |               if os.path.getsize(TMP_FILE) == 0:
234 |                   logger.info(f"No data in file for {PREFIX}")
235 |                   return
236 |               key = datetime.datetime.now().strftime(f"{PREFIX}/{PREFIX}-data/payer_id={payer_id}/year=%Y/month=%m/budgets-{account_id}.json")
237 |               boto3.client('s3').upload_file(TMP_FILE, BUCKET, key)
238 |               logger.info(f"Budget data for {account_id} stored at s3://{BUCKET}/{key}")
239 | 
240 |       Handler: 'index.lambda_handler'
241 |       MemorySize: 2688
242 |       Timeout: 300
243 |       Role: !GetAtt LambdaRole.Arn
244 |       Environment:
245 |         Variables:
246 |           BUCKET_NAME: !Ref DestinationBucket
247 |           PREFIX: !Ref CFDataName
248 |           ROLE_NAME: !Ref MultiAccountRoleName
249 | 
250 |     Metadata:
251 |       cfn_nag:
252 |         rules_to_suppress:
253 |           - id: W89 # Lambda functions should be deployed inside a VPC
254 |             reason: "No need for VPC in this case"
255 |           - id: W92 #  Lambda functions should define ReservedConcurrentExecutions to reserve simultaneous executions
256 |             reason: "No need for simultaneous execution"
257 | 
258 |   LogGroup:
259 |     Type: AWS::Logs::LogGroup
260 |     Properties:
261 |       LogGroupName: !Sub "/aws/lambda/${LambdaFunction}"
262 |       RetentionInDays: 60
263 | 
264 |   Crawler:
265 |     Type: AWS::Glue::Crawler
266 |     Properties:
267 |       Name: !Sub '${ResourcePrefix}${CFDataName}-Crawler'
268 |       Role: !Ref GlueRoleARN
269 |       DatabaseName: !Ref DatabaseName
270 |       Targets:
271 |         S3Targets:
272 |           - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/"
273 | 
274 |   ModuleStepFunction:
275 |     Type: AWS::StepFunctions::StateMachine
276 |     Properties:
277 |       StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-StateMachine'
278 |       StateMachineType: STANDARD
279 |       RoleArn: !Ref StepFunctionExecutionRoleARN
280 |       DefinitionS3Location:
281 |         Bucket: !Ref CodeBucket
282 |         Key: !Ref StepFunctionTemplate
283 |       DefinitionSubstitutions:
284 |         AccountCollectorLambdaARN: !Ref AccountCollectorLambdaARN
285 |         ModuleLambdaARN: !GetAtt LambdaFunction.Arn
286 |         Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-Crawler"]'
287 |         CollectionType: 'LINKED'
288 |         Params: ''
289 |         Module: !Ref CFDataName
290 |         DeployRegion: !Ref AWS::Region
291 |         Account: !Ref AWS::AccountId
292 |         Prefix: !Ref ResourcePrefix
293 |         Bucket: !Ref DestinationBucket
294 | 
295 |   ModuleRefreshSchedule:
296 |     Type: 'AWS::Scheduler::Schedule'
297 |     Properties:
298 |       Description: !Sub 'Scheduler for the ODC ${CFDataName} module'
299 |       Name: !Sub '${ResourcePrefix}${CFDataName}-RefreshSchedule'
300 |       ScheduleExpression: !Ref Schedule
301 |       State: ENABLED
302 |       FlexibleTimeWindow:
303 |         MaximumWindowInMinutes: 30
304 |         Mode: 'FLEXIBLE'
305 |       Target:
306 |         Arn: !GetAtt ModuleStepFunction.Arn
307 |         RoleArn: !Ref SchedulerExecutionRoleARN
308 | 
309 |   AnalyticsExecutor:
310 |     Type: Custom::LambdaAnalyticsExecutor
311 |     Properties:
312 |       ServiceToken: !Ref LambdaAnalyticsARN
313 |       Name: !Ref CFDataName
314 | 
315 |   AthenaQuery:
316 |     Type: AWS::Athena::NamedQuery
317 |     Properties:
318 |       Database: !Ref DatabaseName
319 |       Description: !Sub "Provides a summary view of the ${CFDataName}"
320 |       Name: !Sub "${CFDataName}_view"
321 |       QueryString: !Sub |
322 |         CREATE OR REPLACE VIEW budgets_view AS
323 |         SELECT
324 |           budgetname budget_name
325 |         , CAST(budgetlimit.amount AS decimal) budget_amount
326 |         , CAST(calculatedspend.actualspend.amount AS decimal) actualspend
327 |         , CAST(calculatedspend.forecastedspend.amount AS decimal) forecastedspend
328 |         , timeunit
329 |         , budgettype budget_type
330 |         , account_id
331 |         , timeperiod.start start_date
332 |         , timeperiod."end"  end_date
333 |         , year budget_year
334 |         , month budget_month
335 |         FROM
336 |           ${DatabaseName}.budgets_data
337 |         WHERE (budgettype = 'COST')  AND costfilters.filter[1] = 'None'
338 | 


--------------------------------------------------------------------------------
/data-collection/deploy/module-license-manager.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: Retrieves AWS Marketplace License and grant information from AWS License manager from across an organization
  3 | Parameters:
  4 |   DatabaseName:
  5 |     Type: String
  6 |     Description: Name of the Athena database to be created to hold lambda information
  7 |     Default: optimization_data
  8 |   DestinationBucket:
  9 |     Type: String
 10 |     Description: Name of the S3 Bucket that exists or needs to be created to hold backup information
 11 |     AllowedPattern: (?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$)
 12 |   DestinationBucketARN:
 13 |     Type: String
 14 |     Description: ARN of the S3 Bucket that exists or needs to be created to hold backup information
 15 |   ManagementRoleName:
 16 |     Type: String
 17 |     Description: The name of the IAM role that will be deployed in the management account which can retrieve AWS Organization data. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT
 18 |   CFDataName:
 19 |     Type: String
 20 |     Description: The name of what this cf is doing.
 21 |     Default: license-manager
 22 |   GrantDataPrefix:
 23 |     Type: String
 24 |     Description: Prefix for Grant data.
 25 |     Default: grants
 26 |   LicenseDataPrefix:
 27 |     Type: String
 28 |     Description: Prefix for License data.
 29 |     Default: licenses
 30 |   GlueRoleARN:
 31 |     Type: String
 32 |     Description: Arn for the Glue Crawler role
 33 |   Schedule:
 34 |     Type: String
 35 |     Description: EventBridge Schedule to trigger the data collection
 36 |     Default: "rate(14 days)"
 37 |   ResourcePrefix:
 38 |     Type: String
 39 |     Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable
 40 |   LambdaAnalyticsARN:
 41 |     Type: String
 42 |     Description: Arn of lambda for Analytics
 43 |   AccountCollectorLambdaARN:
 44 |     Type: String
 45 |     Description: Arn of the Account Collector Lambda
 46 |   CodeBucket:
 47 |     Type: String
 48 |     Description: Source code bucket
 49 |   StepFunctionTemplate:
 50 |     Type: String
 51 |     Description: S3 key to the JSON template for the StepFunction
 52 |   StepFunctionExecutionRoleARN:
 53 |     Type: String
 54 |     Description: Common role for Step Function execution
 55 |   SchedulerExecutionRoleARN:
 56 |     Type: String
 57 |     Description: Common role for module Scheduler execution
 58 |   DataBucketsKmsKeysArns:
 59 |     Type: String
 60 |     Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys."
 61 |     Default: ""
 62 | 
 63 | Conditions:
 64 |   NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ]
 65 | 
 66 | Outputs:
 67 |   StepFunctionARN:
 68 |     Description: ARN for the module's Step Function
 69 |     Value: !GetAtt ModuleStepFunction.Arn
 70 | 
 71 | Resources:
 72 |   LambdaRole:
 73 |     Type: AWS::IAM::Role
 74 |     Properties:
 75 |       RoleName: !Sub "${ResourcePrefix}${CFDataName}-LambdaRole"
 76 |       AssumeRolePolicyDocument:
 77 |         Statement:
 78 |           - Action:
 79 |               - sts:AssumeRole
 80 |             Effect: Allow
 81 |             Principal:
 82 |               Service:
 83 |                 - !Sub "lambda.${AWS::URLSuffix}"
 84 |         Version: 2012-10-17
 85 |       ManagedPolicyArns:
 86 |         - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
 87 |       Path: /
 88 |       Policies:
 89 |         - PolicyName: !Sub "${CFDataName}-ManagementAccount-LambdaRole"
 90 |           PolicyDocument:
 91 |             Version: "2012-10-17"
 92 |             Statement:
 93 |               - Effect: "Allow"
 94 |                 Action: "sts:AssumeRole"
 95 |                 Resource: !Sub "arn:${AWS::Partition}:iam::*:role/${ManagementRoleName}" # Need to assume a Read role in management accounts
 96 |         - !If
 97 |           - NeedDataBucketsKms
 98 |           - PolicyName: "KMS"
 99 |             PolicyDocument:
100 |               Version: "2012-10-17"
101 |               Statement:
102 |                 - Effect: "Allow"
103 |                   Action:
104 |                     - "kms:GenerateDataKey"
105 |                   Resource: !Split [ ',', !Ref DataBucketsKmsKeysArns ]
106 |           - !Ref AWS::NoValue
107 |         - PolicyName: "S3-Access"
108 |           PolicyDocument:
109 |             Version: "2012-10-17"
110 |             Statement:
111 |               - Effect: "Allow"
112 |                 Action:
113 |                   - "s3:PutObject"
114 |                 Resource:
115 |                   - !Sub "${DestinationBucketARN}/*"
116 |     Metadata:
117 |       cfn_nag:
118 |         rules_to_suppress:
119 |           - id: W28 # Resource found with an explicit name, this disallows updates that require replacement of this resource
120 |             reason: "Need explicit name to identify role actions"
121 | 
122 |   LambdaFunction:
123 |     Type: AWS::Lambda::Function
124 |     Properties:
125 |       FunctionName: !Sub '${ResourcePrefix}${CFDataName}-Lambda'
126 |       Description: !Sub "Lambda function to retrieve ${CFDataName}"
127 |       Runtime: python3.12
128 |       Architectures: [x86_64]
129 |       Code:
130 |         ZipFile: |
131 |           """ Collects AWS Marketplace Licensing and grant information,
132 |           and uploads to S3. Creates Step functions, Glue crawler to crawl S3 bucket,
133 |           creates Athena tables and view.
134 |           """
135 |           import os
136 |           import json
137 |           import logging
138 |           from datetime import date
139 |           import boto3
140 |           # Initialize AWS clients
141 | 
142 |           s3 = boto3.client('s3')
143 |           athena = boto3.client('athena')
144 | 
145 |           logger = logging.getLogger(__name__)
146 |           logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
147 |           BUCKET = os.environ['BUCKET_NAME']
148 |           ROLE = os.environ['ROLE_NAME']
149 |           S3_GRANTS_PREFIX = os.environ['S3_GRANTS_PREFIX']
150 |           S3_LICENSES_PREFIX = os.environ['S3_LICENSES_PREFIX']
151 |           PREFIX = os.environ['PREFIX']
152 | 
153 |           def store_data_to_s3(data, prefix, payer_id):
154 |               if not data:
155 |                   logger.info("No data")
156 |                   return
157 |               json_data = "\n".join(json.dumps(entity) for entity in data)
158 |               key = date.today().strftime(f"{PREFIX}/{PREFIX}-{prefix}/payer_id={payer_id}/year=%Y/month=%m/day=%d/%Y-%m-%d.json")
159 |               s3.put_object(
160 |                   Bucket=BUCKET,
161 |                   Key=key,
162 |                   Body=json_data,
163 |                   ContentType='application/json'
164 |               )
165 |               logger.info(f'File upload successful to s3://{BUCKET}/{key}')
166 | 
167 |           def get_received_licenses(license_manager):
168 |               licenses = []
169 |               pagination_token = '' #nosec
170 |               while True: #Operation list_received_licenses cannot be paginated
171 |                   response = license_manager.list_received_licenses(
172 |                       MaxResults=100,
173 |                       NextToken=pagination_token
174 |                   )
175 |                   licenses.extend(response.get('Licenses', []))
176 |                   pagination_token = response.get('NextToken', '')
177 |                   if not pagination_token:
178 |                       break
179 |               return licenses
180 | 
181 |           def process_one_management_acc(management_account_id):
182 |               region = boto3.session.Session().region_name
183 |               partition = boto3.session.Session().get_partition_for_region(region_name=region)
184 |               logger.debug('assuming role')
185 |               cred = boto3.client('sts').assume_role(
186 |                   RoleArn=f"arn:{partition}:iam::{management_account_id}:role/{ROLE}",
187 |                   RoleSessionName="data_collection"
188 |               )['Credentials']
189 |               license_manager = boto3.client(
190 |                   'license-manager',
191 |                   "us-east-1", # Must be "us-east-1"
192 |                   aws_access_key_id=cred['AccessKeyId'],
193 |                   aws_secret_access_key=cred['SecretAccessKey'],
194 |                   aws_session_token=cred['SessionToken'],
195 |               )
196 |               process_license_information(license_manager, management_account_id)
197 | 
198 |           def process_license_information(license_manager, management_account_id):
199 |               logger.info("Retrieving licensing information")
200 |               license_grants = []
201 |               try:
202 |                   marketplace_licenses = [
203 |                       license_ for license_ in get_received_licenses(license_manager)
204 |                       if license_.get('Issuer', {}).get('Name') == 'AWS/Marketplace'
205 |                   ]
206 | 
207 |                   for license_ in marketplace_licenses:
208 |                       license_arn = license_['LicenseArn']
209 |                       try:
210 |                           grants_for_license = license_manager.list_received_grants_for_organization(LicenseArn=license_arn)['Grants']
211 |                       except license_manager.exceptions.AccessDeniedException:
212 |                           print(
213 |                               'ERROR: AccessDenied when getting grants for ', license_arn,
214 |                               'Open https://us-east-1.console.aws.amazon.com/marketplace/home#/settings and make sure '
215 |                               'the organization trust in Marketplace settings is enabled. '
216 |                           )
217 |                       else:
218 |                           license_grants.extend(grants_for_license)
219 | 
220 |                   # Store the licenses data to S3
221 |                   store_data_to_s3(marketplace_licenses, S3_LICENSES_PREFIX, management_account_id)
222 | 
223 |                   # Store the grants data to S3
224 |                   store_data_to_s3(license_grants, S3_GRANTS_PREFIX, management_account_id)
225 | 
226 |               except Exception as exc: #pylint: disable=W0718
227 |                   logging.error(f"{management_account_id} : {exc}")
228 |               return "Successful"
229 | 
230 |           def lambda_handler(event, context): #pylint: disable=W0613
231 |               logger.info(f"Event data {json.dumps(event)}")
232 |               if 'account' not in event:
233 |                   raise ValueError(
234 |                       "Please do not trigger this Lambda manually."
235 |                       "Find the corresponding state machine in Step Functions and Trigger from there."
236 |                   )
237 |               account = json.loads(event["account"])
238 |               try:
239 |                   process_one_management_acc(account["account_id"])
240 |               except Exception as exc: #pylint: disable=W0718
241 |                   logging.error(f"{account['account_id']} :  {exc}")
242 | 
243 |               return "Successful"
244 | 
245 |       Handler: 'index.lambda_handler'
246 |       MemorySize: 2688
247 |       Timeout: 600
248 |       Role: !GetAtt LambdaRole.Arn
249 |       Environment:
250 |         Variables:
251 |           BUCKET_NAME: !Ref DestinationBucket
252 |           S3_GRANTS_PREFIX: !Ref GrantDataPrefix
253 |           S3_LICENSES_PREFIX: !Ref LicenseDataPrefix
254 |           PREFIX: !Ref CFDataName
255 |           ROLE_NAME: !Ref ManagementRoleName
256 |     Metadata:
257 |       cfn_nag:
258 |         rules_to_suppress:
259 |           - id: W89 # Lambda functions should be deployed inside a VPC
260 |             reason: "No need for VPC in this case"
261 |           - id: W92 #  Lambda functions should define ReservedConcurrentExecutions to reserve simultaneous executions
262 |             reason: "No need for simultaneous execution"
263 | 
264 |   LogGroup:
265 |     Type: AWS::Logs::LogGroup
266 |     Properties:
267 |       LogGroupName: !Sub "/aws/lambda/${LambdaFunction}"
268 |       RetentionInDays: 60
269 | 
270 |   GrantsCrawler:
271 |     Type: AWS::Glue::Crawler
272 |     Properties:
273 |       Name: !Sub '${ResourcePrefix}${CFDataName}-${GrantDataPrefix}-Crawler'
274 |       Role: !Ref GlueRoleARN
275 |       DatabaseName: !Ref DatabaseName
276 |       Targets:
277 |         S3Targets:
278 |           - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${GrantDataPrefix}/"
279 | 
280 |   LicensesCrawler:
281 |     Type: AWS::Glue::Crawler
282 |     Properties:
283 |       Name: !Sub '${ResourcePrefix}${CFDataName}-${LicenseDataPrefix}-Crawler'
284 |       Role: !Ref GlueRoleARN
285 |       DatabaseName: !Ref DatabaseName
286 |       Targets:
287 |         S3Targets:
288 |           - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${LicenseDataPrefix}/"
289 | 
290 |   ModuleStepFunction:
291 |     Type: AWS::StepFunctions::StateMachine
292 |     Properties:
293 |       StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-StateMachine'
294 |       StateMachineType: STANDARD
295 |       RoleArn: !Ref StepFunctionExecutionRoleARN
296 |       DefinitionS3Location:
297 |         Bucket: !Ref CodeBucket
298 |         Key: !Ref StepFunctionTemplate
299 |       DefinitionSubstitutions:
300 |         AccountCollectorLambdaARN: !Ref AccountCollectorLambdaARN
301 |         ModuleLambdaARN: !GetAtt LambdaFunction.Arn
302 |         Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-${LicenseDataPrefix}-Crawler","${ResourcePrefix}${CFDataName}-${GrantDataPrefix}-Crawler"]'
303 |         CollectionType: "Payers"
304 |         Params: ''
305 |         Module: !Ref CFDataName
306 |         DeployRegion: !Ref AWS::Region
307 |         Account: !Ref AWS::AccountId
308 |         Prefix: !Ref ResourcePrefix
309 |         Bucket: !Ref DestinationBucket
310 | 
311 |   ModuleRefreshSchedule:
312 |     Type: 'AWS::Scheduler::Schedule'
313 |     Properties:
314 |       Description: !Sub 'Scheduler for the ODC ${CFDataName} module'
315 |       Name: !Sub '${ResourcePrefix}${CFDataName}-RefreshSchedule'
316 |       ScheduleExpression: !Ref Schedule
317 |       State: ENABLED
318 |       FlexibleTimeWindow:
319 |         MaximumWindowInMinutes: 30
320 |         Mode: 'FLEXIBLE'
321 |       Target:
322 |         Arn: !GetAtt ModuleStepFunction.Arn
323 |         RoleArn: !Ref SchedulerExecutionRoleARN
324 | 
325 |   AnalyticsExecutor:
326 |     Type: Custom::LambdaAnalyticsExecutor
327 |     Properties:
328 |       ServiceToken: !Ref LambdaAnalyticsARN
329 |       Name: !Ref CFDataName
330 | 


--------------------------------------------------------------------------------
/data-collection/deploy/source/ecs/Athena/bu_usage_view.sql:
--------------------------------------------------------------------------------
 1 | CREATE OR REPLACE VIEW bu_usage_view AS 
 2 | SELECT
 3 |   "bill_payer_account_id"
 4 | , "line_item_product_code"
 5 | , "line_item_usage_account_id"
 6 | , "line_item_resource_id"
 7 | , "task"
 8 | , "resource_tags_aws_ecs_service_Name"
 9 | , "line_item_usage_type"
10 | , "line_item_operation"
11 | , "sum"(CAST("sum_line_item_usage_amount" AS double)) "sum_line_item_usage_amount"
12 | , "cur"."month"
13 | , "cur"."year"
14 | , "cluster"
15 | , "services"
16 | , "servicearn"
17 | , "account_id"
18 | , "value"
19 | FROM
20 |   ((
21 |    SELECT
22 |      "bill_payer_account_id"
23 |    , "line_item_product_code"
24 |    , "line_item_usage_account_id"
25 |    , "line_item_resource_id"
26 |    , "split"("line_item_resource_id", '/')[2] "task"
27 |    , "resource_tags_aws_ecs_service_Name"
28 |    , "line_item_usage_type"
29 |    , "line_item_operation"
30 |    , "sum"(CAST("line_item_usage_amount" AS double)) "sum_line_item_usage_amount"
31 |    , "month"
32 |    , "year"
33 |    FROM
34 |      ${CUR}
35 |    WHERE ((("line_item_operation" = 'ECSTask-EC2') AND ("line_item_product_code" IN ('AmazonECS'))) AND ("line_item_usage_type" LIKE '%GB%'))
36 |    GROUP BY "bill_payer_account_id", "line_item_usage_account_id", "line_item_product_code", "line_item_operation", "line_item_resource_id", "resource_tags_aws_ecs_service_Name", "line_item_usage_type", "line_item_operation", "month", "year"
37 | )  cur
38 | LEFT JOIN (
39 |    SELECT
40 |      "cluster"
41 |    , "services"
42 |    , "servicearn"
43 |    , "value"
44 |    , "year"
45 |    , "month"
46 |    , "account_id"
47 |    FROM
48 |      cluster_metadata_view
49 | )  clusters_data ON ((("clusters_data"."account_id" = "cur"."line_item_usage_account_id") AND (("clusters_data"."services" = "cur"."resource_tags_aws_ecs_service_name") AND ("clusters_data"."year" = "cur"."year"))) AND ("clusters_data"."month" = "cur"."month")))
50 | GROUP BY "bill_payer_account_id", "line_item_usage_account_id", "line_item_product_code", "line_item_operation", "line_item_resource_id", "resource_tags_aws_ecs_service_Name", "line_item_usage_type", "line_item_operation", "cur"."month", "cur"."year", "cluster", "services", "servicearn", "value", "task", "account_id"


--------------------------------------------------------------------------------
/data-collection/deploy/source/ecs/Athena/cluster_metadata_view.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE VIEW cluster_metadata_view AS 
2 | SELECT
3 |   *
4 | , "tag"."value"
5 | FROM
6 |   (ecs_services_clusters_data
7 | CROSS JOIN UNNEST("tags") t (tag))
8 | WHERE ("tag"."key" = 'BU')


--------------------------------------------------------------------------------
/data-collection/deploy/source/ecs/Athena/ec2_cluster_costs_view.sql:
--------------------------------------------------------------------------------
 1 | CREATE OR REPLACE VIEW ec2_cluster_costs_view AS 
 2 | SELECT
 3 |   "line_item_product_code"
 4 | , "line_item_usage_account_id"
 5 | , "line_item_resource_id"
 6 | , "line_item_usage_type"
 7 | , "sum"((CASE WHEN ("line_item_line_item_type" = 'SavingsPlanCoveredUsage') THEN "line_item_usage_amount" WHEN ("line_item_line_item_type" = 'DiscountedUsage') THEN "line_item_usage_amount" WHEN ("line_item_line_item_type" = 'Usage') THEN "line_item_usage_amount" ELSE 0 END)) "sum_line_item_usage_amount"
 8 | , "sum"("line_item_unblended_cost") "unblended_cost"
 9 | , "sum"((CASE WHEN ("line_item_line_item_type" = 'SavingsPlanCoveredUsage') THEN "savings_plan_savings_plan_effective_cost" WHEN ("line_item_line_item_type" = 'SavingsPlanRecurringFee') THEN ("savings_plan_total_commitment_to_date" - "savings_plan_used_commitment") WHEN ("line_item_line_item_type" = 'SavingsPlanNegation') THEN 0 WHEN ("line_item_line_item_type" = 'SavingsPlanUpfrontFee') THEN 0 WHEN ("line_item_line_item_type" = 'DiscountedUsage') THEN "reservation_effective_cost" WHEN ("line_item_line_item_type" = 'RIFee') THEN ("reservation_unused_amortized_upfront_fee_for_billing_period" + "reservation_unused_recurring_fee") ELSE "line_item_unblended_cost" END)) "sum_line_item_amortized_cost"
10 | , "month"
11 | , "year"
12 | FROM
13 |   ${CUR}
14 | WHERE (((product_product_name = 'Amazon Elastic Compute Cloud') AND (("resource_tags_user_name" LIKE '%ECS%') OR ("resource_tags_user_name" LIKE '%ecs%'))) AND ((("line_item_usage_type" LIKE '%BoxUsage%') OR ("line_item_usage_type" LIKE '%Spot%')) OR (line_item_usage_type LIKE '%%EBS%%Volume%%')))
15 | GROUP BY "resource_tags_user_name", "line_item_product_code", "line_item_usage_account_id", "line_item_resource_id", "line_item_usage_type", "month", "year"
16 | 


--------------------------------------------------------------------------------
/data-collection/deploy/source/ecs/Athena/ecs_chargeback_report.sql:
--------------------------------------------------------------------------------
 1 | -- FINAL
 2 | SELECT bu_usage_view.line_item_usage_account_id, sum(sum_line_item_usage_amount) AS task_usage, total_usage, (sum(sum_line_item_usage_amount)/total_usage) as "percent",  ec2_cost, ((sum(sum_line_item_usage_amount)/total_usage)*ec2_cost) as ecs_cost,
 3 |          "cluster",
 4 |          services,
 5 |          servicearn,
 6 |          value,
 7 |          bu_usage_view.month,
 8 |          bu_usage_view.year
 9 | FROM "bu_usage_view"
10 | 
11 | left join (select line_item_usage_account_id, sum(sum_line_item_usage_amount) as total_usage, year, month from "bu_usage_view" where "cluster" <> '' group by line_item_usage_account_id, year, month) sum
12 | on sum.line_item_usage_account_id = bu_usage_view.line_item_usage_account_id
13 | and sum.month=bu_usage_view.month
14 | and sum.year=bu_usage_view.year
15 | left join
16 | (SELECT line_item_usage_account_id, month, year, sum(sum_line_item_amortized_cost) as ec2_cost FROM "ec2_cluster_costs_view" group by  line_item_usage_account_id,month,year) ec2_cost
17 | on ec2_cost.month=bu_usage_view.month
18 | and ec2_cost.year=bu_usage_view.year
19 | and ec2_cost.line_item_usage_account_id=bu_usage_view.line_item_usage_account_id
20 | where "cluster" <> '' 
21 | and bu_usage_view.month = '6'  -- if((date_format(current_timestamp , '%M') = 'January'),bu_usage_view.month = '12', bu_usage_view.month = CAST((month(now())-1) AS VARCHAR) )
22 | and bu_usage_view.year = '2021' -- if((date_format(current_timestamp , '%M') = 'January'), bu_usage_view.year = CAST((year(now())-1) AS VARCHAR) ,bu_usage_view.year = CAST(year(now()) AS VARCHAR))
23 | GROUP BY  "cluster", services, servicearn, value, bu_usage_view.month, bu_usage_view.year, bu_usage_view.line_item_usage_account_id, total_usage, ec2_cost


--------------------------------------------------------------------------------
/data-collection/deploy/source/partition_repair_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This utility realigns new Athena table partitions to the original type in the event that an API call
 3 | returns a datatype that does not match the type from the initial crawler run.
 4 | 
 5 | Usage:
 6 |     Determine the name of your database and the table you wish to alter:
 7 | 
 8 |     python3 {prog} <database_name> <table_name>
 9 | 
10 | """
11 | import sys
12 | import logging
13 | import boto3
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | def realign_partitions(database_name, table_name):
18 |     logger.info(f"Realigning partitions for {database_name}.{table_name}")
19 | 
20 |     glue_client = boto3.client("glue")
21 | 
22 |     # Get the data types of the base table
23 |     table_response = glue_client.get_table(
24 |         DatabaseName=database_name,
25 |         Name=table_name
26 |     )
27 | 
28 |     column_to_datatype = {
29 |         item["Name"]: item["Type"] for item in table_response["Table"]["StorageDescriptor"]["Columns"]
30 |     }
31 | 
32 |     # List partitions and datatypes
33 |     partition_params = {
34 |         "DatabaseName": database_name,
35 |         "TableName": table_name,
36 |     }
37 |     response = glue_client.get_partitions(**partition_params)
38 |     partitions = response["Partitions"]
39 | 
40 |     while "NextToken" in response:
41 |         partition_params["NextToken"] = response["NextToken"]
42 |         response = glue_client.get_partitions(**partition_params)
43 | 
44 |         partitions += response["Partitions"]
45 | 
46 |     logger.debug(f"Found {len(partitions)} partitions")
47 | 
48 |     partitions_to_update = []
49 |     for partition in partitions:
50 |         changed = False
51 |         columns = partition["StorageDescriptor"]["Columns"]
52 |         new_columns = []
53 |         for column in columns:
54 |             if column["Name"] in column_to_datatype and column["Type"] != column_to_datatype[column["Name"]]:
55 |                 changed = True
56 |                 logger.debug(f"Changing type of {column['Name']} from {column['Type']} to {column_to_datatype[column['Name']]}")
57 |                 column["Type"] = column_to_datatype[column["Name"]]
58 |             new_columns.append(column)
59 |         partition["StorageDescriptor"]["Columns"] = new_columns
60 |         if changed:
61 |             partitions_to_update.append(partition)
62 | 
63 |     logger.debug(f"{len(partitions_to_update)} partitions of table {table_name} will be updated.")
64 | 
65 |     # Update partitions if necessary
66 |     for partition in partitions_to_update:
67 |         logger.debug(f"Updating {', '.join(partition['Values'])}")
68 |         partition.pop("CatalogId")
69 |         partition.pop("CreationTime")
70 |         glue_client.update_partition(
71 |             DatabaseName=partition.pop("DatabaseName"),
72 |             TableName=partition.pop("TableName"),
73 |             PartitionValueList=partition['Values'],
74 |             PartitionInput=partition
75 |         )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     logging.basicConfig(level=logging.ERROR)
80 |     logger.setLevel(logging.DEBUG)
81 |     try:
82 |         database_name = sys.argv[1]
83 |         table_name = sys.argv[2]
84 |     except:
85 |         print(__doc__.format(prog=sys.argv[0]))
86 |         exit(1)
87 |     realign_partitions(database_name, table_name)


--------------------------------------------------------------------------------
/data-collection/deploy/source/regions.csv:
--------------------------------------------------------------------------------
 1 | ﻿Region,Region Name,Endpoint,Protocol
 2 | us-east-2,US East (Ohio),rds.us-east-2.amazonaws.com,HTTPS
 3 | us-east-1,US East (N. Virginia),rds.us-east-1.amazonaws.com,HTTPS
 4 | us-west-1,US West (N. California),rds.us-west-1.amazonaws.com,HTTPS
 5 | us-west-2,US West (Oregon),rds.us-west-2.amazonaws.com,HTTPS
 6 | af-south-1,Africa (Cape Town),rds.af-south-1.amazonaws.com,HTTPS
 7 | ap-east-1,Asia Pacific (Hong Kong),rds.ap-east-1.amazonaws.com,HTTPS
 8 | ap-south-1,Asia Pacific (Mumbai),rds.ap-south-1.amazonaws.com,HTTPS
 9 | ap-northeast-3,Asia Pacific (Osaka-Local),rds.ap-northeast-3.amazonaws.com,HTTPS
10 | ap-northeast-2,Asia Pacific (Seoul),rds.ap-northeast-2.amazonaws.com,HTTPS
11 | ap-southeast-1,Asia Pacific (Singapore),rds.ap-southeast-1.amazonaws.com,HTTPS
12 | ap-southeast-2,Asia Pacific (Sydney),rds.ap-southeast-2.amazonaws.com,HTTPS
13 | ap-northeast-1,Asia Pacific (Tokyo),rds.ap-northeast-1.amazonaws.com,HTTPS
14 | ca-central-1,Canada (Central),rds.ca-central-1.amazonaws.com,HTTPS
15 | cn-north-1,China (Beijing),rds.cn-north-1.amazonaws.com.cn,HTTPS
16 | cn-northwest-1,China (Ningxia),rds.cn-northwest-1.amazonaws.com.cn,HTTPS
17 | eu-central-1,EU (Frankfurt),rds.eu-central-1.amazonaws.com,HTTPS
18 | eu-west-1,EU (Ireland),rds.eu-west-1.amazonaws.com,HTTPS
19 | eu-west-2,EU (London),rds.eu-west-2.amazonaws.com,HTTPS
20 | eu-south-1,EU (Milan),rds.eu-south-1.amazonaws.com,HTTPS
21 | eu-west-3,EU (Paris),rds.eu-west-3.amazonaws.com,HTTPS
22 | eu-north-1,EU (Stockholm),rds.eu-north-1.amazonaws.com,HTTPS
23 | me-south-1,Middle East (Bahrain),rds.me-south-1.amazonaws.com,HTTPS
24 | sa-east-1,South America (São Paulo),rds.sa-east-1.amazonaws.com,HTTPS
25 | us-gov-east-1,AWS GovCloud (US-East),rds.us-gov-east-1.amazonaws.com,HTTPS
26 | us-gov-west-1,AWS GovCloud (US),rds.us-gov-west-1.amazonaws.com,HTTPS


--------------------------------------------------------------------------------
/data-collection/deploy/source/s3_backwards_comp.py:
--------------------------------------------------------------------------------
 1 | from turtle import pd
 2 | import boto3
 3 | import sys
 4 | import logging 
 5 | #python3 s3_backwards_comp.py <payer_id> <ODC_your_bucket_name> 
 6 | 
 7 | payer_id = sys.argv[1]
 8 | your_bucket_name = sys.argv[2]
 9 | 
10 | client = boto3.client('s3')
11 | 
12 | mods = ["ecs-chargeback-data/", "rds_metrics/rds_stats/", "budgets/", "rightsizing/","optics-data-collector/ami-data/","optics-data-collector/ebs-data/", "optics-data-collector/snapshot-data/","optics-data-collector/ta-data/", "Compute_Optimizer/Compute_Optimizer_ec2_instance/", "Compute_Optimizer/Compute_Optimizer_auto_scale/", "Compute_Optimizer/Compute_Optimizer_lambda/", "Compute_Optimizer/Compute_Optimizer_ebs_volume/", "reserveinstance/", "savingsplan/", "transitgateway/"]
13 | 
14 | for mod in mods:
15 |     print(mod)
16 |     response = client.list_objects_v2(Bucket= your_bucket_name, Prefix = mod)
17 |     try:
18 |         for key in response['Contents']:
19 |             source_key = key["Key"]
20 |             if 'payer_id' not in source_key:
21 |                 x = source_key.split("/")[0]
22 |                 source_key_new = source_key.replace(mod, '')
23 |                 copy_source = {'Bucket': your_bucket_name, 'Key': source_key}
24 |                 client.copy_object(Bucket = your_bucket_name, CopySource = copy_source, Key =  f"{mod}payer_id={payer_id}/{source_key_new}")
25 |                 client.delete_object(Bucket = your_bucket_name, Key = source_key)
26 |             else:
27 |                 print(f"{source_key} has payer")
28 |     except Exception as e:
29 |         logging.warning("%s" % e)
30 |         continue
31 |             


--------------------------------------------------------------------------------
/data-collection/deploy/source/step-functions/awsfeeds-state-machine-v1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Comment": "Execute Lambda and Crawler in standalone mode",
 3 |   "StartAt": "Lambda Invoke",
 4 |   "States": {
 5 |     "Lambda Invoke": {
 6 |       "Type": "Task",
 7 |       "Resource": "arn:aws:states:::lambda:invoke",
 8 |       "OutputPath": "$.Payload",
 9 |       "Parameters": {
10 |         "FunctionName": "${ModuleLambdaARN}"
11 |       },
12 |       "Retry": [
13 |         {
14 |           "ErrorEquals": [
15 |             "Lambda.ServiceException",
16 |             "Lambda.AWSLambdaException",
17 |             "Lambda.SdkClientException",
18 |             "Lambda.TooManyRequestsException"
19 |           ],
20 |           "IntervalSeconds": 1,
21 |           "MaxAttempts": 3,
22 |           "BackoffRate": 2
23 |         }
24 |       ],
25 |       "Next": "GetCrawler1"
26 |     },
27 |     "GetCrawler1": {
28 |       "Type": "Task",
29 |       "Parameters": {
30 |         "Name": "${Crawler}"
31 |       },
32 |       "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler",
33 |       "Next": "Choice1",
34 |       "OutputPath": "$.Crawler"
35 |     },
36 |     "Choice1": {
37 |       "Type": "Choice",
38 |       "Choices": [
39 |         {
40 |           "Not": {
41 |             "Variable": "$.State",
42 |             "StringEquals": "READY"
43 |           },
44 |           "Next": "Wait for Crawler to be ready"
45 |         }
46 |       ],
47 |       "Default": "StartCrawler"
48 |     },
49 |     "Wait for Crawler to be ready": {
50 |       "Type": "Wait",
51 |       "Seconds": 15,
52 |       "Next": "GetCrawler1"
53 |     },
54 |     "StartCrawler": {
55 |       "Type": "Task",
56 |       "Parameters": {
57 |         "Name": "${Crawler}"
58 |       },
59 |       "Resource": "arn:aws:states:::aws-sdk:glue:startCrawler",
60 |       "Next": "Wait for Crawler Execution"
61 |     },
62 |     "Wait for Crawler Execution": {
63 |       "Type": "Wait",
64 |       "Seconds": 15,
65 |       "Next": "GetCrawler2"
66 |     },
67 |     "GetCrawler2": {
68 |       "Type": "Task",
69 |       "Parameters": {
70 |         "Name": "${Crawler}"
71 |       },
72 |       "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler",
73 |       "Next": "Choice2",
74 |       "OutputPath": "$.Crawler"
75 |     },
76 |     "Choice2": {
77 |       "Type": "Choice",
78 |       "Choices": [
79 |         {
80 |           "Not": {
81 |             "Variable": "$.State",
82 |             "StringEquals": "READY"
83 |           },
84 |           "Next": "Wait for Crawler Execution"
85 |         }
86 |       ],
87 |       "Default": "Completed"
88 |     },
89 |     "Completed": {
90 |       "Type": "Pass",
91 |       "End": true
92 |     }
93 |   }
94 | }


--------------------------------------------------------------------------------
/data-collection/deploy/source/step-functions/crawler-state-machine.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "Comment": "Orchestrate the Glue Crawlers for the collected data",
  3 |   "StartAt": "SetVariables",
  4 |   "States": {
  5 |     "SetVariables": {
  6 |       "Type": "Pass",
  7 |       "Next": "CrawlerMap",
  8 |       "QueryLanguage": "JSONata",
  9 |       "Output": {
 10 |         "crawlers": "{% $states.input.crawlers %}"
 11 |       },
 12 |       "Assign": {
 13 |         "behavior": "{% $states.input.behavior %}"
 14 |       }
 15 |     },
 16 |     "CrawlerMap": {
 17 |       "Type": "Map",
 18 |       "ItemProcessor": {
 19 |         "ProcessorConfig": {
 20 |           "Mode": "INLINE"
 21 |         },
 22 |         "StartAt": "GetCrawler",
 23 |         "States": {
 24 |           "GetCrawler": {
 25 |             "Type": "Task",
 26 |             "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler",
 27 |             "Retry": [
 28 |               {
 29 |                 "ErrorEquals": [
 30 |                   "States.ALL"
 31 |                 ],
 32 |                 "BackoffRate": 2,
 33 |                 "IntervalSeconds": 1,
 34 |                 "MaxAttempts": 8,
 35 |                 "JitterStrategy": "FULL"
 36 |               }
 37 |             ],
 38 |             "Next": "IsReady",
 39 |             "QueryLanguage": "JSONata",
 40 |             "Arguments": {
 41 |               "Name": "{% $states.input %}"
 42 |             },
 43 |             "Output": {
 44 |               "Name": "{% $states.result.Crawler.Name %}",
 45 |               "State": "{% $states.result.Crawler.State %}"
 46 |             }
 47 |           },
 48 |           "IsReady": {
 49 |             "Type": "Choice",
 50 |             "Default": "WaitForCrawler",
 51 |             "Choices": [
 52 |               {
 53 |                 "Next": "StartCrawler",
 54 |                 "Condition": "{% $states.input.State = 'READY' %}",
 55 |                 "Output": {
 56 |                   "Name": "{% $states.input.Name %}"
 57 |                 }
 58 |               },
 59 |               {
 60 |                 "Next": "NotReadyNoWait",
 61 |                 "Condition": "{% $states.input.State != 'READY' and $behavior = 'NOWAIT' %}"
 62 |               }
 63 |             ],
 64 |             "QueryLanguage": "JSONata",
 65 |             "Output": {
 66 |               "Name": "{% $states.input.Name %}"
 67 |             }
 68 |           },
 69 |           "WaitForCrawler": {
 70 |             "Type": "Wait",
 71 |             "Seconds": 30,
 72 |             "Next": "GetCrawler",
 73 |             "QueryLanguage": "JSONata",
 74 |             "Output": "{% $states.input.Name %}"
 75 |           },
 76 |           "StartCrawler": {
 77 |             "Type": "Task",
 78 |             "Resource": "arn:aws:states:::aws-sdk:glue:startCrawler",
 79 |             "End": true,
 80 |             "Retry": [
 81 |               {
 82 |                 "ErrorEquals": [
 83 |                   "States.ALL"
 84 |                 ],
 85 |                 "BackoffRate": 2,
 86 |                 "IntervalSeconds": 1,
 87 |                 "MaxAttempts": 8,
 88 |                 "JitterStrategy": "FULL"
 89 |               }
 90 |             ],
 91 |             "QueryLanguage": "JSONata",
 92 |             "Arguments": {
 93 |               "Name": "{% $states.input.Name %}"
 94 |             }
 95 |           },
 96 |           "NotReadyNoWait": {
 97 |             "Type": "Succeed",
 98 |             "QueryLanguage": "JSONata"
 99 |           }
100 |         }
101 |       },
102 |       "End": true,
103 |       "QueryLanguage": "JSONata",
104 |       "Items": "{% $states.input.crawlers %}"
105 |     }
106 |   },
107 |   "TimeoutSeconds": 1200
108 | }


--------------------------------------------------------------------------------
/data-collection/deploy/source/step-functions/health-detail-state-machine.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "Comment": "Collects Health Events",
  3 |   "StartAt": "SetGlobalVariables1",
  4 |   "States": {
  5 |     "SetGlobalVariables1": {
  6 |       "Type": "Pass",
  7 |       "QueryLanguage": "JSONata",
  8 |       "Assign": {
  9 |         "MODULE": "${Module}",
 10 |         "PARAMS": "",
 11 |         "DATA_COLLECTION_REGION": "${DeployRegion}",
 12 |         "DATA_COLLECTION_ACCOUNT": "${Account}",
 13 |         "PREFIX": "${Prefix}",
 14 |         "BUCKET": "${Bucket}",
 15 |         "CRAWLERS": ${Crawlers},
 16 |         "EXE_UUID": "{% $states.input.main_exe_uuid %}",
 17 |         "CHILD_UUID": "{% $split($states.context.Execution.Id, ':')[7] %}",
 18 |         "EXE_START_TIME_SPLIT": "{% $split($states.context.Execution.StartTime, /[-:T.]/) %}",
 19 |         "MAP_KEY": "{% $states.input.file %}",
 20 |         "ACCOUNT": "{% $states.input.account %}",
 21 |         "INGEST_TIME": "{% $states.input.ingestion_time %}",
 22 |         "STACK_VERSION": "{% $states.input.stack_version %}"
 23 |       },
 24 |       "Next": "SetGlobalVariables2"
 25 |     },
 26 |     "SetGlobalVariables2": {
 27 |       "Type": "Pass",
 28 |       "QueryLanguage": "JSONata",
 29 |       "Assign": {
 30 |         "LOG_KEY_BASE": "{% 'logs/modules/'&$EXE_START_TIME_SPLIT[0]&'/'&$EXE_START_TIME_SPLIT[1]&'/'&$EXE_START_TIME_SPLIT[2]&'/'&$MODULE&'detail-sf-' %}",
 31 |         "CRAWLER_STATE_MACHINE": "{% 'arn:aws:states:'&$DATA_COLLECTION_REGION&':'&$DATA_COLLECTION_ACCOUNT&':stateMachine:'&$PREFIX&'CrawlerExecution-StateMachine' %}",
 32 |         "SUB_UUID": {
 33 |           "child-state-machine-exid": "{% $CHILD_UUID %}",
 34 |           "statemachine-id": "{% $states.context.StateMachine.Id %}"
 35 |         }
 36 |       },
 37 |       "Next": "EntryLog"
 38 |     },
 39 |     "EntryLog": {
 40 |       "Type": "Task",
 41 |       "Resource": "arn:aws:states:::aws-sdk:s3:putObject",
 42 |       "QueryLanguage": "JSONata",
 43 |       "Arguments": {
 44 |         "Bucket": "{% $BUCKET %}",
 45 |         "Key": "{% $LOG_KEY_BASE&'entry-'&$EXE_UUID&'.json' %}",
 46 |         "Body": {
 47 |           "Timestamp": "{% $replace($now(), 'Z', '') %}",
 48 |           "DataCollectionRegion": "{% $DATA_COLLECTION_REGION %}",
 49 |           "DataCollectionAccountId": "{% $DATA_COLLECTION_ACCOUNT %}",
 50 |           "Module": "{% $MODULE %}",
 51 |           "ModuleFunction": "child-sf-entry",
 52 |           "Params": "{% $PARAMS %}",
 53 |           "PayerId": "",
 54 |           "AccountId": "",
 55 |           "Region": "",
 56 |           "StatusCode": 200,
 57 |           "SubCode": "",
 58 |           "RecordCount": 0,
 59 |           "Description": "Child Step Function entered",
 60 |           "DataLocation": "",
 61 |           "MainExeUuid": "{% $EXE_UUID %}",
 62 |           "SubUuid": "{% $SUB_UUID %}",
 63 |           "Service": "StepFunction",
 64 |           "StackVersion": "{% $STACK_VERSION %}",
 65 |           "SubVersion": ""
 66 |         }
 67 |       },
 68 |       "Retry": [
 69 |         {
 70 |           "ErrorEquals": [
 71 |             "States.ALL"
 72 |           ],
 73 |           "BackoffRate": 2,
 74 |           "IntervalSeconds": 1,
 75 |           "MaxAttempts": 3
 76 |         }
 77 |       ],
 78 |       "Next": "DataCollectionMap"
 79 |     },
 80 |     "DataCollectionMap": {
 81 |       "Type": "Map",
 82 |       "QueryLanguage": "JSONata",
 83 |       "ItemReader": {
 84 |         "Resource": "arn:aws:states:::s3:getObject",
 85 |         "ReaderConfig": {
 86 |           "InputType": "CSV",
 87 |           "CSVHeaderLocation": "FIRST_ROW"
 88 |         },
 89 |         "Arguments": {
 90 |           "Bucket": "{% $BUCKET %}",
 91 |           "Key": "{% $MAP_KEY %}"
 92 |         }
 93 |       },
 94 |       "MaxConcurrency": 1,
 95 |       "ItemBatcher": {
 96 |         "MaxItemsPerBatch": 500,
 97 |         "BatchInput": {
 98 |           "account": "{% $ACCOUNT %}",
 99 |           "ingestion_time": "{% $INGEST_TIME %}",
100 |           "main_exe_uuid": "{% $EXE_UUID %}",
101 |           "sub_uuid": "{% $SUB_UUID %}",
102 |           "bucket": "{% $BUCKET %}",
103 |           "dc_account": "{% $DATA_COLLECTION_ACCOUNT %}",
104 |           "dc_region": "{% $DATA_COLLECTION_REGION %}",
105 |           "module": "{% $MODULE %}",
106 |           "prefix": "{% $PREFIX %}",
107 |           "log_key_base": "{% $LOG_KEY_BASE %}",
108 |           "params": "{% $PARAMS %}",
109 |           "stack_version": "{% $STACK_VERSION %}"
110 |         }
111 |       },
112 |       "ItemProcessor": {
113 |         "ProcessorConfig": {
114 |           "Mode": "DISTRIBUTED",
115 |           "ExecutionType": "STANDARD"
116 |         },
117 |         "StartAt": "DataCollectionLambda",
118 |         "States": {
119 |           "DataCollectionLambda": {
120 |             "Type": "Task",
121 |             "QueryLanguage": "JSONata",
122 |             "Resource": "arn:aws:states:::lambda:invoke",
123 |             "Arguments": {
124 |               "FunctionName": "{% 'arn:aws:lambda:'&$states.input.BatchInput.dc_region&':'&$states.input.BatchInput.dc_account&':function:'&$states.input.BatchInput.prefix&$states.input.BatchInput.module&'-Lambda' %}",
125 |               "Payload": {
126 |                 "account": "{% $states.input.BatchInput.account %}",
127 |                 "main_exe_uuid": "{% $states.input.BatchInput.main_exe_uuid %}",
128 |                 "sub_uuid": "{% $states.input.BatchInput.sub_uuid %}",
129 |                 "params": "{% $states.input.BatchInput.params %}",
130 |                 "ingestion_time": "{% $states.input.BatchInput.ingestion_time %}",
131 |                 "stack_version": "{% $states.input.BatchInput.stack_version %}",
132 |                 "items": "{% $states.input.Items %}"
133 |               }
134 |             },
135 |             "Catch": [
136 |               {
137 |                 "ErrorEquals": [
138 |                   "States.ALL"
139 |                 ],
140 |                 "Output": {
141 |                   "account": "{% $states.input.BatchInput.account %}",
142 |                   "main_exe_uuid": "{% $states.input.BatchInput.main_exe_uuid %}",
143 |                   "sub_uuid": "{% $merge([$states.input.BatchInput.sub_uuid, {'map-state-machine-exid': $split($states.context.Execution.Id, ':')[7]}]) %}",
144 |                   "module": "{% $states.input.BatchInput.module %}",
145 |                   "bucket": "{% $states.input.BatchInput.bucket %}",
146 |                   "dc_account": "{% $states.input.BatchInput.dc_account %}",
147 |                   "dc_region": "{% $states.input.BatchInput.dc_region %}",
148 |                   "log_key_base": "{% $states.input.BatchInput.log_key_base %}",
149 |                   "params": "{% $states.input.BatchInput.params %}",
150 |                   "stack_version": "{% $states.input.BatchInput.stack_version %}",
151 |                   "description": "{% $states.errorOutput %}"
152 |                 },
153 |                 "Next": "DCLambdaErrorMetric"
154 |               }
155 |             ],
156 |             "Retry": [
157 |               {
158 |                 "ErrorEquals": [
159 |                   "Lambda.TooManyRequestsException"
160 |                 ],
161 |                 "IntervalSeconds": 2,
162 |                 "MaxAttempts": 6,
163 |                 "BackoffRate": 2,
164 |                 "JitterStrategy": "FULL"
165 |               }
166 |             ],
167 |             "End": true
168 |           },
169 |           "DCLambdaErrorMetric": {
170 |             "Type": "Task",
171 |             "QueryLanguage": "JSONata",
172 |             "Resource": "arn:aws:states:::aws-sdk:cloudwatch:putMetricData",
173 |             "Arguments": {
174 |               "Namespace": "CID-DataCollection",
175 |               "MetricData": [
176 |                 {
177 |                   "MetricName": "Error",
178 |                   "Value": 1,
179 |                   "Unit": "Count",
180 |                   "Dimensions": [
181 |                     {
182 |                       "Name": "Module",
183 |                       "Value": "{% $states.input.module %}"
184 |                     }
185 |                   ]
186 |                 }
187 |               ]
188 |             },
189 |             "Output": {
190 |               "account": "{% $states.input.account %}",
191 |               "main_exe_uuid": "{% $states.input.main_exe_uuid %}",
192 |               "sub_uuid": "{% $states.input.sub_uuid %}",
193 |               "description": "{% $states.input.description %}",
194 |               "module": "{% $states.input.module %}",
195 |               "bucket": "{% $states.input.bucket %}",
196 |               "dc_account": "{% $states.input.dc_account %}",
197 |               "dc_region": "{% $states.input.dc_region %}",
198 |               "log_key_base": "{% $states.input.log_key_base %}",
199 |               "params": "{% $states.input.params %}",
200 |               "stack_version": "{% $states.input.stack_version %}"
201 |             },
202 |             "Next": "DCLambdaErrorLog"
203 |           },
204 |           "DCLambdaErrorLog": {
205 |             "Type": "Task",
206 |             "Resource": "arn:aws:states:::aws-sdk:s3:putObject",
207 |             "QueryLanguage": "JSONata",
208 |             "Arguments": {
209 |               "Bucket": "{% $states.input.bucket %}",
210 |               "Key": "{% $states.input.log_key_base&'-'&$random()&'.json' %}",
211 |               "Body": {
212 |                 "Timestamp": "{% $replace($now(), 'Z', '') %}",
213 |                 "DataCollectionRegion": "{% $states.input.dc_region %}",
214 |                 "DataCollectionAccountId": "{% $states.input.dc_account %}",
215 |                 "Module": "{% $states.input.module %}",
216 |                 "ModuleFunction": "sf-dc-lambda-error-log",
217 |                 "Params": "{% $states.input.params %}",
218 |                 "PayerId": "{% $states.input.account.payer_id %}",
219 |                 "AccountId": "",
220 |                 "Region": "",
221 |                 "StatusCode": 500,
222 |                 "SubCode": "",
223 |                 "RecordCount": 0,
224 |                 "Description": "{% $states.input.description %}",
225 |                 "DataLocation": "",
226 |                 "MainExeUuid": "{% $states.input.main_exe_uuid %}",
227 |                 "SubUuid": "{% $states.input.sub_uuid %}",
228 |                 "Service": "StepFunction",
229 |                 "StackVersion": "{% $states.input.stack_version %}",
230 |                 "SubVersion": ""
231 |               }
232 |             },
233 |             "Output": {
234 |               "description": "{% $states.input.description %}"
235 |             },
236 |             "Retry": [
237 |               {
238 |                 "ErrorEquals": [
239 |                   "States.ALL"
240 |                 ],
241 |                 "BackoffRate": 2,
242 |                 "IntervalSeconds": 1,
243 |                 "MaxAttempts": 3
244 |               }
245 |             ],
246 |             "Next": "FailMap"
247 |           },
248 |           "FailMap": {
249 |             "Type": "Fail",
250 |             "QueryLanguage": "JSONata",
251 |             "Error": "MapLambdaExecutionError",
252 |             "Cause": "Error in Detail Lambda trapped. See logs in your Data Collection bucket."
253 |           }
254 |         }
255 |       },
256 |       "Output": {
257 |         "status_code": 200,
258 |         "description": "Health Events detail Map task completed successfully"
259 |       },
260 |       "Catch": [
261 |         {
262 |           "ErrorEquals": [
263 |             "States.ALL"
264 |           ],
265 |           "Output": {
266 |             "status_code": 500,
267 |             "description": "{% $states.errorOutput %}"
268 |           },
269 |           "Next": "MapErrorMetric"
270 |         }
271 |       ],
272 |       "Next": "CrawlerStepFunction"
273 |     },
274 |     "MapErrorMetric": {
275 |       "Type": "Task",
276 |       "QueryLanguage": "JSONata",
277 |       "Resource": "arn:aws:states:::aws-sdk:cloudwatch:putMetricData",
278 |       "Arguments": {
279 |         "Namespace": "CID-DataCollection",
280 |         "MetricData": [
281 |           {
282 |             "MetricName": "Error",
283 |             "Value": 1,
284 |             "Unit": "Count",
285 |             "Dimensions": [
286 |               {
287 |                 "Name": "Module",
288 |                 "Value": "{% $MODULE %}"
289 |               }
290 |             ]
291 |           }
292 |         ]
293 |       },
294 |       "Assign": {
295 |         "ExecutionStatus": 500,
296 |         "Description": "Child Step Function AccountMap failed most or all executions"
297 |       },
298 |       "Next": "ExitLog"
299 |     },
300 |     "CrawlerStepFunction": {
301 |       "Type": "Task",
302 |       "QueryLanguage": "JSONata",
303 |       "Resource": "arn:aws:states:::states:startExecution.sync:2",
304 |       "Arguments": {
305 |         "StateMachineArn": "{% $CRAWLER_STATE_MACHINE %}",
306 |         "Input": {
307 |           "behavior": "WAIT",
308 |           "crawlers": "{% $CRAWLERS %}"
309 |         }
310 |       },
311 |       "Assign": {
312 |         "ExecutionStatus": 200,
313 |         "Description": "Child Step Function execution completed successfully"
314 |       },
315 |       "Catch": [
316 |         {
317 |           "ErrorEquals": [
318 |             "States.ALL"
319 |           ],
320 |           "Assign": {
321 |             "ExecutionStatus": 500,
322 |             "Description": "{% 'Child Step Function AccountMap failed with error: '&$states.errorOutput %}"
323 |           },
324 |           "Next": "ExitLog"
325 |         }
326 |       ],
327 |       "Next": "ExitLog"
328 |     },
329 |     "ExitLog": {
330 |       "Type": "Task",
331 |       "Resource": "arn:aws:states:::aws-sdk:s3:putObject",
332 |       "QueryLanguage": "JSONata",
333 |       "Arguments": {
334 |         "Bucket": "{% $BUCKET %}",
335 |         "Key": "{% $LOG_KEY_BASE&'exit-'&$EXE_UUID&'.json' %}",
336 |         "Body": {
337 |           "Timestamp": "{% $replace($now(), 'Z', '') %}",
338 |           "DataCollectionRegion": "{% $DATA_COLLECTION_REGION %}",
339 |           "DataCollectionAccountId": "{% $DATA_COLLECTION_ACCOUNT %}",
340 |           "Module": "{% $MODULE %}",
341 |           "ModuleFunction": "child-sf-exit",
342 |           "Params": "{% $PARAMS %}",
343 |           "PayerId": "{% $ACCOUNT.payer_id %}",
344 |           "AccountId": "",
345 |           "Region": "",
346 |           "StatusCode": "{% $ExecutionStatus %}",
347 |           "SubCode": "",
348 |           "RecordCount": 0,
349 |           "Description": "{% $Description %}",
350 |           "DataLocation": "",
351 |           "MainExeUuid": "{% $EXE_UUID %}",
352 |           "SubUuid": "{% $SUB_UUID %}",
353 |           "Service": "StepFunction",
354 |           "StackVersion": "{% $STACK_VERSION %}",
355 |           "SubVersion": ""
356 |         }
357 |       },
358 |       "Retry": [
359 |         {
360 |           "ErrorEquals": [
361 |             "States.ALL"
362 |           ],
363 |           "BackoffRate": 2,
364 |           "IntervalSeconds": 1,
365 |           "MaxAttempts": 3
366 |         }
367 |       ],
368 |       "Next": "IsError"
369 |     },
370 |     "IsError": {
371 |       "Type": "Choice",
372 |       "Choices": [
373 |         {
374 |           "Condition": "{% $ExecutionStatus >= 500 %}",
375 |           "Next": "Fail"
376 |         }
377 |       ],
378 |       "QueryLanguage": "JSONata",
379 |       "Default": "Success"
380 |     },
381 |     "Success": {
382 |       "Type": "Succeed"
383 |     },
384 |     "Fail": {
385 |       "Type": "Fail"
386 |     }
387 |   },
388 |   "TimeoutSeconds": 10800
389 | }


--------------------------------------------------------------------------------
/data-collection/deploy/source/step-functions/main-state-machine-v2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Comment": "Orchestrate the collection of ${Module} data",
 3 |     "StartAt": "AccountCollectorInvoke",
 4 |     "States": {
 5 |       "AccountCollectorInvoke": {
 6 |         "Type": "Task",
 7 |         "Resource": "arn:aws:states:::lambda:invoke",
 8 |         "Parameters": {
 9 |           "Payload": {
10 |             "Type": "${CollectionType}"
11 |           },
12 |           "FunctionName": "${AccountCollectorLambdaARN}"
13 |         },
14 |         "Retry": [
15 |           {
16 |             "ErrorEquals": [
17 |               "Lambda.ServiceException",
18 |               "Lambda.AWSLambdaException",
19 |               "Lambda.SdkClientException",
20 |               "Lambda.TooManyRequestsException"
21 |             ],
22 |             "IntervalSeconds": 2,
23 |             "MaxAttempts": 6,
24 |             "BackoffRate": 2
25 |           }
26 |         ],
27 |         "Next": "AccountMap",
28 |         "ResultPath": "$.accountLambdaOutput"
29 |       },
30 |       "AccountMap": {
31 |         "Type": "Map",
32 |         "ItemProcessor": {
33 |           "ProcessorConfig": {
34 |             "Mode": "DISTRIBUTED",
35 |             "ExecutionType": "STANDARD"
36 |           },
37 |           "StartAt": "InvokeModuleLambda",
38 |           "States": {
39 |             "InvokeModuleLambda": {
40 |               "Type": "Task",
41 |               "Resource": "arn:aws:states:${DeployRegion}:${Account}:lambda:invoke",
42 |               "OutputPath": "$.Payload",
43 |               "Parameters": {
44 |                 "Payload": {
45 |                   "account.$": "$.account",
46 |                   "params": "${Params}"
47 |                 },
48 |                 "FunctionName": "${ModuleLambdaARN}"
49 |               },
50 |               "Retry": [
51 |                 {
52 |                   "ErrorEquals": [
53 |                     "Lambda.ServiceException",
54 |                     "Lambda.AWSLambdaException",
55 |                     "Lambda.SdkClientException",
56 |                     "Lambda.TooManyRequestsException"
57 |                   ],
58 |                   "IntervalSeconds": 2,
59 |                   "MaxAttempts": 6,
60 |                   "BackoffRate": 2
61 |                 }
62 |               ],
63 |               "End": true
64 |             }
65 |           }
66 |         },
67 |         "MaxConcurrency": 60,
68 |         "ItemReader": {
69 |           "Resource": "arn:aws:states:::s3:getObject",
70 |           "ReaderConfig": {
71 |             "InputType": "JSON"
72 |           },
73 |           "Parameters": {
74 |             "Bucket.$": "$.accountLambdaOutput.Payload.bucket",
75 |             "Key.$": "$.accountLambdaOutput.Payload.accountList"
76 |           }
77 |         },
78 |         "Next": "CrawlerStepFunctionStartExecution"
79 |       },
80 |       "CrawlerStepFunctionStartExecution": {
81 |         "Type": "Task",
82 |         "Resource": "arn:aws:states:::states:startExecution.sync:2",
83 |         "Parameters": {
84 |           "StateMachineArn": "arn:aws:states:${DeployRegion}:${Account}:stateMachine:${Prefix}CrawlerExecution-StateMachine",
85 |           "Input": {
86 |             "crawlers": ${Crawlers}
87 |           }
88 |         },
89 |         "End": true
90 |       }
91 |     },
92 |     "TimeoutSeconds": 10800
93 |   }
94 | 


--------------------------------------------------------------------------------
/data-collection/deploy/source/step-functions/main-state-machine-v3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Comment": "Orchestrate the collection of ${Module} data",
 3 |     "StartAt": "AccountCollectorInvoke",
 4 |     "States": {
 5 |       "AccountCollectorInvoke": {
 6 |         "Type": "Task",
 7 |         "Resource": "arn:aws:states:::lambda:invoke",
 8 |         "Parameters": {
 9 |           "Payload": {
10 |             "Type": "${CollectionType}"
11 |           },
12 |           "FunctionName": "${AccountCollectorLambdaARN}"
13 |         },
14 |         "Retry": [
15 |           {
16 |             "ErrorEquals": [
17 |               "Lambda.ServiceException",
18 |               "Lambda.AWSLambdaException",
19 |               "Lambda.SdkClientException",
20 |               "Lambda.TooManyRequestsException"
21 |             ],
22 |             "IntervalSeconds": 2,
23 |             "MaxAttempts": 6,
24 |             "BackoffRate": 2
25 |           }
26 |         ],
27 |         "Next": "AccountMap",
28 |         "ResultPath": "$.accountLambdaOutput"
29 |       },
30 |       "AccountMap": {
31 |         "Type": "Map",
32 |         "ItemProcessor": {
33 |           "ProcessorConfig": {
34 |             "Mode": "DISTRIBUTED",
35 |             "ExecutionType": "STANDARD"
36 |           },
37 |           "StartAt": "InvokeModuleLambda",
38 |           "States": {
39 |             "InvokeModuleLambda": {
40 |               "Type": "Task",
41 |               "Resource": "arn:aws:states:${DeployRegion}:${Account}:lambda:invoke",
42 |               "OutputPath": "$.Payload",
43 |               "Parameters": {
44 |                 "Payload": {
45 |                   "account.$": "$.account",
46 |                   "params": "${Params}"
47 |                 },
48 |                 "FunctionName": "${ModuleLambdaARN}"
49 |               },
50 |               "Retry": [
51 |                 {
52 |                   "ErrorEquals": [
53 |                     "Lambda.ServiceException",
54 |                     "Lambda.AWSLambdaException",
55 |                     "Lambda.SdkClientException",
56 |                     "Lambda.TooManyRequestsException"
57 |                   ],
58 |                   "IntervalSeconds": 2,
59 |                   "MaxAttempts": 6,
60 |                   "BackoffRate": 2
61 |                 }
62 |               ],
63 |               "End": true
64 |             }
65 |           }
66 |         },
67 |         "MaxConcurrency": 60,
68 |         "ItemReader": {
69 |           "Resource": "arn:aws:states:::s3:getObject",
70 |           "ReaderConfig": {
71 |             "InputType": "JSON"
72 |           },
73 |           "Parameters": {
74 |             "Bucket.$": "$.accountLambdaOutput.Payload.bucket",
75 |             "Key.$": "$.accountLambdaOutput.Payload.accountList"
76 |           }
77 |         },
78 |         "Next": "CrawlerStepFunctionStartExecution"
79 |       },
80 |       "CrawlerStepFunctionStartExecution": {
81 |         "Type": "Task",
82 |         "Resource": "arn:aws:states:::states:startExecution.sync:2",
83 |         "Parameters": {
84 |           "StateMachineArn": "arn:aws:states:${DeployRegion}:${Account}:stateMachine:${Prefix}CrawlerExecution-StateMachine",
85 |           "Input": {
86 |             "crawlers": ${Crawlers}
87 |           }
88 |         },
89 |         "End": true
90 |       }
91 |     },
92 |     "TimeoutSeconds": 10800
93 |   }
94 | 


--------------------------------------------------------------------------------
/data-collection/deploy/source/step-functions/standalone-state-machine.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "Comment": "Execute Lambda and Crawler in standalone mode",
  3 |   "StartAt": "Lambda Invoke",
  4 |   "States": {
  5 |     "Lambda Invoke": {
  6 |       "Type": "Task",
  7 |       "Resource": "arn:aws:states:::lambda:invoke",
  8 |       "OutputPath": "$.Payload",
  9 |       "Parameters": {
 10 |         "FunctionName": "${ModuleLambdaARN}"
 11 |       },
 12 |       "Retry": [
 13 |         {
 14 |           "ErrorEquals": [
 15 |             "Lambda.ServiceException",
 16 |             "Lambda.AWSLambdaException",
 17 |             "Lambda.SdkClientException",
 18 |             "Lambda.TooManyRequestsException"
 19 |           ],
 20 |           "IntervalSeconds": 1,
 21 |           "MaxAttempts": 3,
 22 |           "BackoffRate": 2
 23 |         }
 24 |       ],
 25 |       "Next": "GetCrawler1"
 26 |     },
 27 |     "GetCrawler1": {
 28 |       "Type": "Task",
 29 |       "Parameters": {
 30 |         "Name": "${Crawler}"
 31 |       },
 32 |       "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler",
 33 |       "Next": "Choice1",
 34 |       "OutputPath": "$.Crawler",
 35 |       "Retry": [
 36 |         {
 37 |           "ErrorEquals": [
 38 |             "Glue.ThrottlingException"
 39 |           ],
 40 |           "BackoffRate": 2,
 41 |           "IntervalSeconds": 5,
 42 |           "MaxAttempts": 5,
 43 |           "JitterStrategy": "FULL"
 44 |         }
 45 |       ]
 46 |     },
 47 |     "Choice1": {
 48 |       "Type": "Choice",
 49 |       "Choices": [
 50 |         {
 51 |           "Not": {
 52 |             "Variable": "$.State",
 53 |             "StringEquals": "READY"
 54 |           },
 55 |           "Next": "Wait for Crawler to be ready"
 56 |         }
 57 |       ],
 58 |       "Default": "StartCrawler"
 59 |     },
 60 |     "Wait for Crawler to be ready": {
 61 |       "Type": "Wait",
 62 |       "Seconds": 60,
 63 |       "Next": "GetCrawler1"
 64 |     },
 65 |     "StartCrawler": {
 66 |       "Type": "Task",
 67 |       "Parameters": {
 68 |         "Name": "${Crawler}"
 69 |       },
 70 |       "Resource": "arn:aws:states:::aws-sdk:glue:startCrawler",
 71 |       "Next": "Wait for Crawler Execution",
 72 |       "Retry": [
 73 |         {
 74 |           "ErrorEquals": [
 75 |             "Glue.ThrottlingException"
 76 |           ],
 77 |           "BackoffRate": 2,
 78 |           "IntervalSeconds": 5,
 79 |           "MaxAttempts": 5,
 80 |           "JitterStrategy": "FULL"
 81 |         }
 82 |       ]
 83 |     },
 84 |     "Wait for Crawler Execution": {
 85 |       "Type": "Wait",
 86 |       "Seconds": 60,
 87 |       "Next": "GetCrawler2"
 88 |     },
 89 |     "GetCrawler2": {
 90 |       "Type": "Task",
 91 |       "Parameters": {
 92 |         "Name": "${Crawler}"
 93 |       },
 94 |       "Resource": "arn:aws:states:::aws-sdk:glue:getCrawler",
 95 |       "Next": "Choice2",
 96 |       "OutputPath": "$.Crawler",
 97 |       "Retry": [
 98 |         {
 99 |           "ErrorEquals": [
100 |             "Glue.ThrottlingException"
101 |           ],
102 |           "BackoffRate": 2,
103 |           "IntervalSeconds": 5,
104 |           "MaxAttempts": 5,
105 |           "JitterStrategy": "FULL"
106 |         }
107 |       ]
108 |     },
109 |     "Choice2": {
110 |       "Type": "Choice",
111 |       "Choices": [
112 |         {
113 |           "Not": {
114 |             "Variable": "$.State",
115 |             "StringEquals": "READY"
116 |           },
117 |           "Next": "Wait for Crawler Execution"
118 |         }
119 |       ],
120 |       "Default": "Completed"
121 |     },
122 |     "Completed": {
123 |       "Type": "Pass",
124 |       "End": true
125 |     }
126 |   }
127 | }


--------------------------------------------------------------------------------
/data-collection/utils/bump-release.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import git
 4 | import json
 5 | 
 6 | repo = git.Repo('.')
 7 | 
 8 | print(repo.git.execute('git checkout main'.split()))
 9 | print(repo.git.execute('git pull'.split()))
10 | 
11 | 
12 | old_ver = json.load(open("data-collection/utils/version.json"))['version']
13 | 
14 | print (old_ver)
15 | bump='patch'
16 | if len(sys.argv)>1:
17 | 	bump = sys.argv[1]
18 | 
19 | maj, minor, patch = map(int, old_ver.split('.'))
20 | 
21 | if bump=='patch':
22 | 	new_ver = '.'.join(map(str,[maj, minor, patch + 1]))
23 | elif bump=='minor':
24 | 	new_ver = '.'.join(map(str,[maj, minor + 1, 0]))
25 | else:
26 | 	raise NotImplementedError('only patch and minor are implemented')
27 | 
28 | print(repo.git.execute(f"git checkout -b release/{new_ver}".split()))
29 | 
30 | 
31 | tx = open("data-collection/utils/version.json").read()
32 | with open("data-collection/utils/version.json", "w") as f:
33 | 	f.write(tx.replace(old_ver,new_ver))
34 | 
35 | 
36 | filenames = [
37 |     'data-collection/deploy/deploy-data-read-permissions.yaml',
38 |     'data-collection/deploy/deploy-data-collection.yaml',
39 |     'data-collection/deploy/deploy-in-management-account.yaml',
40 |     'data-collection/deploy/deploy-in-linked-account.yaml',
41 |     'data-collection/deploy/source/step-functions/main-state-machine.json',
42 | 	"data-collection/utils/version.json",
43 | ]
44 | for filename in filenames:
45 | 	tx = open(filename).read()
46 | 	with open(filename, "w") as f:
47 | 		f.write(tx.replace(f"v{old_ver}", f"v{new_ver}"))
48 | 
49 | 
50 | print(repo.git.execute('git diff HEAD --unified=0'.split()))
51 | 
52 | print('to undo:\n git checkout HEAD -- cfn-templates/cid-cfn.yml cid/_version.py')
53 | print(f"to continue:\n git commit -am 'release {new_ver}'; git push origin 'release/{new_ver}'")
54 | 


--------------------------------------------------------------------------------
/data-collection/utils/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2016,SC2086,SC2162
 3 | # This script can be used for release
 4 | 
 5 | export AWS_REGION=us-east-1
 6 | export STACK_SET_NAME=LayerBuckets
 7 | export CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards
 8 | 
 9 | code_path=$(git rev-parse --show-toplevel)/data-collection/deploy
10 | version=v$(jq -r '.version' data-collection/utils/version.json)
11 | 
12 | echo "sync to central bucket"
13 | aws s3 sync $code_path/       s3://$CENTRAL_BUCKET/cfn/data-collection/
14 | aws s3 sync $code_path/       s3://$CENTRAL_BUCKET/cfn/data-collection/$version/
15 | 
16 | echo "sync to regional bucket with version prefix"
17 | aws cloudformation list-stack-instances \
18 |   --stack-set-name $STACK_SET_NAME \
19 |   --query 'Summaries[].[StackId,Region]' \
20 |   --output text |
21 |   while read stack_id region; do
22 |     echo "sync to $region"
23 |     bucket=$(aws cloudformation list-stack-resources --stack-name $stack_id \
24 |       --query 'StackResourceSummaries[?LogicalResourceId == `LayerBucket`].PhysicalResourceId' \
25 |       --region $region --output text)
26 | 
27 |     aws s3 sync $code_path/       s3://$bucket/cfn/data-collection/
28 |     aws s3 sync $code_path/       s3://$bucket/cfn/data-collection/$version/ --delete
29 |   done
30 | 
31 | echo 'Done'
32 | 


--------------------------------------------------------------------------------
/data-collection/utils/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2086
 3 | # This script uploads CloudFormation files to S3 bucket. Can be used with any testing bucket or prod.
 4 | # see also README.md
 5 | 
 6 | if [ -n "$1" ]; then
 7 |   bucket=$1
 8 | else
 9 |   echo "ERROR: First parameter not supplied. Provide a bucket name."
10 |   exit 1
11 | fi
12 | code_path=$(git rev-parse --show-toplevel)/data-collection/deploy
13 | version=$(jq -r '.version' data-collection/utils/version.json)
14 | 
15 | echo "Sync to $bucket"
16 | aws s3 sync $code_path/       s3://$bucket/cfn/data-collection/v$version/ --delete
17 | echo 'Done'
18 | 


--------------------------------------------------------------------------------
/data-collection/utils/version.json:
--------------------------------------------------------------------------------
1 | {
2 |     "version": "3.11.0"
3 | }


--------------------------------------------------------------------------------
/data-exports/README.md:
--------------------------------------------------------------------------------
 1 | # Data Exports and Legacy CUR
 2 | 
 3 | ## Table of Contents
 4 | - [Introduction](#introduction)
 5 | - [Data Exports](#data-exports)
 6 |   - [Basic Architecture](#basic-architecture-of-data-exports)
 7 |   - [Advanced Architecture](#advanced-architecture-of-data-exports)
 8 | - [Legacy Cost and Usage Report](#legacy-cost-and-usage-report)
 9 | - [FAQ](#faq)
10 | 
11 | ## Introduction
12 | This readme contains description of solutions for AWS Data Exports and Legacy CUR replication and consolidation across multiple accounts. This is a part of Cloud Intelligence Dashboards and it is recommended by [AWS Data Exports official documentation](https://docs.aws.amazon.com/cur/latest/userguide/dataexports-processing.html).
13 | 
14 | ## Data Exports
15 | 
16 | For deployment instructions, please refer to the documentation at: https://catalog.workshops.aws/awscid/data-exports.  
17 | 
18 | Check code here: [data-exports-aggregation.yaml](deploy/data-exports-aggregation.yaml)
19 | 
20 | 
21 | ### Basic Architecture of Data Exports
22 | ![Basic Architecture of Data Exports](/.images/architecture-data-exports.png  "Basic Architecture of Data Exports")
23 | 
24 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) delivers daily Cost & Usage Report (CUR2) and other reports to an [Amazon S3 Bucket](https://aws.amazon.com/s3/) in the Management Account.
25 | 2. [Amazon S3](https://aws.amazon.com/s3/) replication rule copies Export data to a dedicated Data Collection Account S3 bucket automatically.
26 | 3. [Amazon Athena](https://aws.amazon.com/athena/) allows querying data directly from the S3 bucket using an [AWS Glue](https://aws.amazon.com/glue/) table schema definition.
27 | 4. [Amazon QuickSight](https://aws.amazon.com/quicksight/) datasets can read from [Amazon Athena](https://aws.amazon.com/athena/). Check Cloud Intelligence Dashboards for more details.
28 | 
29 | ### Advanced Architecture of Data Exports
30 | For customers with additional requirements, an enhanced architecture is available:
31 | 
32 | ![Advanced Architecture of Data Exports](/.images/architecture-data-exports-advanced.png  "Advanced Architecture of Data Exports")
33 | 
34 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) service delivers updated monthly [Cost & Usage Report (CUR2)](https://docs.aws.amazon.com/cur/latest/userguide/what-is-cur.html) up to three times a day to an [Amazon S3](https://aws.amazon.com/s3/) Bucket in your AWS Account (either in Management/Payer Account or a regular Linked Account). In us-east-1 region, the CloudFormation creates native resources; in other regions, CloudFormation uses AWS Lambda and Custom Resource to provision Data Exports in us-east-1.
35 | 
36 | 2. [Amazon S3 replication](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html) rules copy Export data to a dedicated Data Collection Account automatically. This replication filters out all metadata and makes the file structure on the S3 bucket compatible with [Amazon Athena](https://aws.amazon.com/athena/) and [AWS Glue](https://aws.amazon.com/glue/) requirements.
37 | 
38 | 3. A [Bucket Policy](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-policies.html) controls which accounts can replicate data to the destination bucket.
39 | 
40 | 4. [AWS Glue Crawler](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#crawling-component) runs every midnight UTC to update the partitions of the table definition in [AWS Glue Data Catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#data-catalog-component).
41 | 
42 | 5. [Amazon QuickSight](https://aws.amazon.com/quicksight/) pulls data from Amazon Athena to its SPICE (Super-fast, Parallel, In-memory Calculation Engine).
43 | 
44 | 6. Updated QuickSight dashboards are available for the users.
45 | 
46 | 7. When collecting data exports for Linked accounts (not for Management Accounts), you may also want to collect data exports for the Data Collection account itself. In this case, specify the Data Collection account as the first in the list of Source Accounts. Replication is still required to remove metadata.
47 | 
48 | 8. Athena's reading process can be affected by writing operations. When replication arrives, it might fail to update datasets, especially with high volumes of data. In such cases, consider scheduling temporary disabling and re-enabling of the Amazon S3 bucket policy that allows replication. Since exports typically arrive up to three times a day, this temporary deactivation has minimal side effects and the updated data will be available with the next data delivery.
49 | 
50 | 9. (Optional) Secondary bucket replication enables customers to archive data exports, consolidating data exports from multiple AWS Organisations or deploying staging environments (as described below ). 
51 | 
52 | ### Using Secondary Replication Bucket
53 | There can be various situations where customers need to replicate data exports to multiple destinations. One common scenario is a large enterprise with multiple business units, each with one or more AWS organisations. For this large enterprise, the Headquarters requires a consolidated view across all Business Units while simultaneously enabling individual Business Units to have visibility into their own data. 
54 | 
55 | To accomplish this, both the Headquarters and Business Unit can implement separate data export destination stacks. Business Unit administrators, working from their management account, can specify a target bucket located within the Headquarters stack, enabling seamless data replication to both S3 buckets.
56 | 
57 | Other scenario can be a replicating data to a staging environment for testing purposes.
58 | 
59 | ![Secondary Replication Bucket](/.images/architecture-data-export-replication-to-secondary.png)
60 | 
61 | 1. [AWS Data Exports](https://aws.amazon.com/aws-cost-management/aws-data-exports/) service delivers updated monthly [Cost & Usage Report (CUR2)](https://docs.aws.amazon.com/cur/latest/userguide/what-is-cur.html) up to three times a day to an [Amazon S3](https://aws.amazon.com/s3/) Bucket in the Business Unit AWS Account (either in Management/Payer Account or a regular Linked Account). In us-east-1 region, the CloudFormation creates native resources; in other regions, CloudFormation uses AWS Lambda and Custom Resource to provision Data Exports in us-east-1.
62 | 
63 | 2. [Amazon S3 replication](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html) rules copy Export data to a dedicated Data Collection Account automatically. This replication filters out all metadata and makes the file structure on the S3 bucket compatible with [Amazon Athena](https://aws.amazon.com/athena/) and [AWS Glue](https://aws.amazon.com/glue/) requirements.
64 | 
65 | 3. Using the Secondary Replication rule, the Export data is replicated from Business Unit to the S3 bucket in the Headquarters AWS account. Each Business unit should create Secondary Replication rule to replicate the data to the S3 bucket in the Headquarters AWS account. This provides the Headquarter a consolidated data of all the Business Units. 
66 | 
67 | ## Legacy Cost and Usage Report
68 | Legacy AWS Cost and Usage Reports (Legacy CUR) can still be used for Cloud Intelligence Dashboards and other use cases.
69 | 
70 | The CID project provides a CloudFormation template for Legacy CUR. Unlike the Data Exports CloudFormation template, it does not provide AWS Glue tables. You can use this template to replicate CUR and aggregate CUR from multiple source accounts (Management or Linked).
71 | 
72 | ![Basic Architecture of CUR](/.images/architecture-legacy-cur.png  "Basic Architecture of CUR")
73 | 
74 | 
75 | Check code here: [cur-aggregation.yaml](deploy/cur-aggregation.yaml)
76 | 
77 | ## FAQ
78 | 
79 | ### Why replicate data instead of providing cross-account access?
80 | Cross-account access is possible but can be difficult to maintain, considering the many different roles that require this access, especially when dealing with multiple accounts.
81 | 
82 | ### We only have one AWS Organization. Do we still need this?
83 | Yes. Throughout an organization's lifecycle, mergers and acquisitions may occur, so this approach prepares you for potential future scenarios.
84 | 
85 | ### Can I use S3 Intelligent Tiering or S3 Infrequent Access (IA) for my CUR data connected to Athena?
86 | We strongly recommend **against** using S3 IA for CUR data that is connected to Athena, especially if you have active FinOps users querying this data. Here's why:
87 | - CUDOS typically only retrieves data for the last 7 months, so theoretically older data could be moved to S3 IA or managed with Intelligent Tiering.
88 | - Moving older CUR parquet files to IA could potentially reduce storage costs by up to 45%.
89 | - **However**, this only saves money if the data isn't frequently accessed. With S3 IA, you're charged $0.01 per GB retrieved.
90 | - Athena uses multiple computational nodes in parallel, and complex queries can multiply data reads dramatically. For every 1GB of data you want to scan, Athena might perform up to 75GB of S3 reads.
91 | - If someone runs a query without properly limiting it to specific billing periods, the retrieval costs can be astronomical. For example:
92 |   * Scanning a full CUR of 600GB: `600GB × 75 × $0.01/GB` = `$450.00` for just one query!
93 | - Due to this risk of human error, we do not use storage tiering as a default and strongly advise against it for CUR data connected to Athena.
94 | We also advise agains Intelligent Tiering by default.
95 | - KPI Dashboard - one of our foundational dashboards - scans the entire CUR (Cost and Usage Report) data to detect the first snapshot and determine its age. This prevents AWS Intelligent Tiering from functioning effectively, as it forces all data to remain in frequent access tiers and result is unnecessary additional monitoring costs with no cost-saving benefits.
96 | 


--------------------------------------------------------------------------------
/data-exports/utils/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2016,SC2086,SC2162
 3 | # This script can be used for release.
 4 | 
 5 | CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards
 6 | files=("data-exports-aggregation" "cur-aggregation")
 7 | for file in "${files[@]}"; do
 8 |     #Here data export stack and legacy cur aggregation have their own versions
 9 |     version=$(grep '^Description:' "data-exports/deploy/${file}.yaml" | grep -o '[0-9]\+\.[0-9]\+\.[0-9]\+')
10 |     source_path="data-exports/deploy/${file}.yaml"
11 |     echo $source_path
12 |     aws s3 cp "$source_path" "s3://$CENTRAL_BUCKET/cfn/data-exports/$version/${file}.yaml"
13 |     aws s3 cp "$source_path" "s3://$CENTRAL_BUCKET/cfn/data-exports/latest/${file}.yaml"
14 |     aws s3 cp "$source_path" "s3://$CENTRAL_BUCKET/cfn/${file}.yaml"
15 | done
16 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | minversion = 6.0
3 | log_format = %(asctime)s [%(levelname)8s] %(message)s
4 | log_cli = true
5 | log_cli_level = INFO
6 | addopts = -s
7 | testpaths =
8 |     test


--------------------------------------------------------------------------------
/rls/.gitignore:
--------------------------------------------------------------------------------
1 | **/create_rls.zip
2 | **vars
3 | *tox.ini
4 | 


--------------------------------------------------------------------------------
/rls/README.md:
--------------------------------------------------------------------------------
  1 | # RLS generator for QuickSight
  2 | 
  3 | ## About QS RLS generator 
  4 | Generate RLS csv file for QuickSight based on AWS Organizational Units.
  5 | 
  6 | [About QuickSight RLS](https://docs.aws.amazon.com/quicksight/latest/user/restrict-access-to-a-data-set-using-row-level-security.html)
  7 | [About AWS Organizational Unit ](https://docs.aws.amazon.com/organizations/latest/userguide/orgs_introduction.html)
  8 | 
  9 | 
 10 | ## Getting Started 
 11 | 
 12 | Code can be executed locally or as Lambda. [AWS Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) are managed standard way.
 13 | To run the lambda define following `ENV_VARS` with following DEFAULTS if ENV_VAR is not set. 
 14 | 
 15 | [Using AWS Lambda environment variables](https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html)
 16 | 
 17 | 
 18 | List of Variables to preconfigure 
 19 | ```
 20 | OWNER_TAG = 'cid_users'
 21 | BUCKET_NAME = 'NO DEFAULT' # Bucket where to upload the code
 22 | QS_REGION = 'QS region'
 23 | export MANAGEMENT_ACCOUNT_IDS='coma seaprated value of account_ids, format ACC_ID:REGION'
 24 | export MANAGMENTROLENAME=WA-Lambda-Assume-Role-Management-Account  #  Role to Assume in every payer/management account
 25 | TMP_RLS_FILE = '/tmp/cid_rls.csv'
 26 | ```
 27 | ## Defining TAGS
 28 | 
 29 | 1) Tags at root OU level, Give full access to all data and overwrite any other rules for user at other levels.
 30 | 2) Tags at OU level will be Inherited TAG to all children accounts.
 31 | 2) Tags at Account level will be generated rules for Account level.
 32 | 
 33 | 
 34 | ## Output 
 35 | 
 36 | Output is writen to `TMP_RLS_FILE` location and uploaded to `BUCKET_NAME`.
 37 | 
 38 | 
 39 | ## Example Output 
 40 | 
 41 | 
 42 | ```
 43 | UserName,account_id,payer_id
 44 | vmindru@megacorp.corp,,
 45 | vmindru_has_it_all,,
 46 | Admin/vmindru-Isengard,,
 47 | cross_ou_user,"0140000000,7200000,74700000,853000000",
 48 | foo_inherit,74700000000,
 49 | student1,"853000000,126000000",
 50 | student2,"853678200000,126600000",
 51 | other@company_foo.com,"363700000,1675000000",
 52 | other@company.com,"36370000000,16750000000",
 53 | vmindru@amazon.com,363000000000,
 54 | ```
 55 | 
 56 | 
 57 | 
 58 | ## Create Lambda
 59 | 
 60 | ### Create a new Lambda in same region with your QS Dashboards 
 61 | 
 62 | 1) Create new Lambda
 63 | 2) Select Python 3.8
 64 | 
 65 | ### Configure Lambda
 66 | 
 67 | 1)  Create and assign new Execution Role LambdaS3Org Role 
 68 | 2)  Create and Add 2 Permission Policies to above LambdaS3Org Role
 69 | 
 70 | `LambdaOrgS3ListTags`
 71 | 
 72 | ```
 73 | {
 74 |     "Version": "2012-10-17",
 75 |     "Statement": [
 76 |         {
 77 |             "Sid": "VisualEditor0",
 78 |             "Effect": "Allow",
 79 |             "Action": [
 80 |                 "organizations:ListAccountsForParent",
 81 |                 "organizations:ListAccounts",
 82 |                 "organizations:ListTagsForResource",
 83 |                 "organizations:ListOrganizationalUnitsForParent"
 84 |             ],
 85 |             "Resource": "*"
 86 |         }
 87 |     ]
 88 | }
 89 | ```
 90 | 
 91 | `AWSLambdaS3ExecutionRole`
 92 | 
 93 | ```
 94 | {
 95 |     "Version": "2012-10-17",
 96 |     "Statement": [
 97 |         {
 98 |             "Sid": "VisualEditor0",
 99 |             "Effect": "Allow",
100 |             "Action": "s3:GetObject",
101 |             "Resource": "arn:aws:s3:::*"
102 |         },
103 |         {
104 |             "Sid": "VisualEditor1",
105 |             "Effect": "Allow",
106 |             "Action": "s3:PutObject",
107 |             "Resource": "arn:aws:s3:::vmindru-cid-fr/cid_rls.csv"
108 |         }
109 |     ]
110 | }
111 | ```
112 | 
113 | ### Add ENV Variables 
114 | 
115 | Go to function settings and add ENV VARS 
116 | 
117 | `BUCKET_NAME` - Bucket where to upload RLS file 
118 | `ROOT_OU`  -  ID of your root OU
119 | 
120 | ### Increase execution time to 120s
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/rls/deploy/deploy_cid_rls.yaml:
--------------------------------------------------------------------------------
  1 | #https://github.com/awslabs/cid-data-collection-framework/blob/main/rls/deploy/deploy_cid_rls.yaml
  2 | AWSTemplateFormatVersion: '2010-09-09'
  3 | Description: Lambda to collect AWS Organization and Amazon QuickSight data and store in S3 for RLS implementation v0.2.0 - AWS Solution SO9011
  4 | Parameters:
  5 |   DestinationBucket:
  6 |     Type: String
  7 |     Description: Name of the S3 Bucket that is created to hold org data
  8 |     AllowedPattern: (?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$)
  9 |   ManagementAccountRole:
 10 |     Type: String
 11 |     Description: The name of the IAM role that will be deployed in the management account which can retrieve AWS Organization data. KEEP THE SAME AS WHAT IS DEPLOYED INTO MANAGEMENT ACCOUNT
 12 |     Default: Lambda-Assume-Role-Management-Account
 13 |   ResourcePrefix:
 14 |     Type: String
 15 |     Description: This prefix will be placed in front of all roles created. Note you may wish to add a dash at the end to make more readable
 16 |     Default: CID-DC-
 17 |   ManagementAccountID:
 18 |     Type: String
 19 |     AllowedPattern: ([a-z0-9\-, ]*?$)
 20 |     Description: "(Ex: 123456789,098654321,789054312) List of Payer IDs you wish to collect data for. Can just be one Accounts"
 21 |   Schedule:
 22 |     Type: String
 23 |     Description: Cron job to trigger the lambda using cloudwatch event
 24 |     Default: "rate(1 hour)"
 25 |   CodeBucket:
 26 |       Type: String
 27 |       Description: S3 Bucket with RLS code,this coverts to CodeBucket-Region e.g. for us-east-1 this will be aws-managed-cost-intelligence-dashboards-us-east-1
 28 |       Default: aws-managed-cost-intelligence-dashboards
 29 |   CodeKey:
 30 |       Type: String
 31 |       Description: file name of ZipFile with data code
 32 |       Default: cfn/rls/create_rls.zip # RLS Folder to be updated, once the LAB will be created
 33 | Outputs:
 34 |   LambdaFunctionName:
 35 |     Value:
 36 |       Ref: CIDRLS
 37 |   LambdaFunctionARN:
 38 |     Description: Lambda function ARN.
 39 |     Value:
 40 |       Fn::GetAtt:
 41 |         - CIDRLS
 42 |         - Arn
 43 | Resources:
 44 |   CIDRLS:
 45 |     Type: AWS::Lambda::Function
 46 |     Properties:
 47 |       FunctionName: !Sub
 48 |         - 'CIDRLS_${Id}'
 49 |         - Id: !Select [0, !Split ['-', !Ref 'AWS::StackName']]
 50 |       Description: LambdaFunction of python3.8.
 51 |       Runtime: python3.9
 52 |       Code:
 53 |         S3Bucket: !Sub '${CodeBucket}-${AWS::Region}'
 54 |         S3Key: !Ref CodeKey
 55 |       Handler: 'create_rls.lambda_handler'
 56 |       MemorySize: 2688
 57 |       Timeout: 600
 58 |       Role: !GetAtt LambdaRole.Arn
 59 |       Environment:
 60 |         Variables:
 61 |           BUCKET_NAME: !Ref DestinationBucket
 62 |           MANAGEMENTROLENAME: !Sub "${ResourcePrefix}${ManagementAccountRole}"
 63 |           MANAGEMENT_ACCOUNT_IDS: !Ref ManagementAccountID
 64 |           QS_REGION: !Ref AWS::Region
 65 |   LambdaRole:
 66 |     Type: AWS::IAM::Role
 67 |     Properties:
 68 |       RoleName: !Sub "${ResourcePrefix}RLS-LambdaRole"
 69 |       AssumeRolePolicyDocument:
 70 |         Statement:
 71 |           - Action:
 72 |               - sts:AssumeRole
 73 |             Effect: Allow
 74 |             Principal:
 75 |               Service:
 76 |                 - lambda.amazonaws.com
 77 |         Version: 2012-10-17
 78 |       ManagedPolicyArns:
 79 |         - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
 80 |       Path: /
 81 |       Policies:
 82 |         - PolicyName: "Assume-Management-Organization-Data-Role"
 83 |           PolicyDocument:
 84 |             Version: "2012-10-17"
 85 |             Statement:
 86 |               - Effect: "Allow"
 87 |                 Action: "sts:AssumeRole"
 88 |                 Resource: !Sub "arn:${AWS::Partition}:iam::*:role/${ResourcePrefix}${ManagementAccountRole}" # Need to assume a Read role in management accounts
 89 |         - PolicyName: "Logs"
 90 |           PolicyDocument:
 91 |             Version: "2012-10-17"
 92 |             Statement:
 93 |               - Effect: "Allow"
 94 |                 Action:
 95 |                   - "logs:CreateLogGroup"
 96 |                   - "logs:CreateLogStream"
 97 |                   - "logs:PutLogEvents"
 98 |                   - "logs:DescribeLogStreams"
 99 |                 Resource: !Sub "arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/CID-RLS*"
100 |               - Effect: "Allow"
101 |                 Action:
102 |                   - "s3:PutObject"
103 |                   - "s3:ListBucket"
104 |                 Resource:
105 |                   - !Sub "arn:${AWS::Partition}:s3:::${DestinationBucket}"
106 |                   - !Sub "arn:${AWS::Partition}:s3:::${DestinationBucket}/*"
107 |               - Effect: "Allow"
108 |                 Action:
109 |                   - "quicksight:ListUsers"
110 |                 Resource: "*" # Cannot restrict this
111 |   CloudWatchTrigger:
112 |     Type: AWS::Events::Rule
113 |     Properties:
114 |       Description: Scheduler
115 |       Name: !Sub
116 |         - 'Scheduler_ForCIDRLS_${Id}'
117 |         - Id: !Select [0, !Split ['-', !Ref 'AWS::StackName']]
118 |       ScheduleExpression: !Ref Schedule
119 |       State: ENABLED
120 |       Targets:
121 |         - Arn: !GetAtt CIDRLS.Arn
122 |           Id: TriggerForCIDRLS
123 |   EventPermission:
124 |     Type: AWS::Lambda::Permission
125 |     Properties:
126 |       FunctionName: !GetAtt CIDRLS.Arn
127 |       Action: lambda:InvokeFunction
128 |       Principal: events.amazonaws.com
129 |       SourceAccount: !Ref 'AWS::AccountId'
130 |       SourceArn: !GetAtt CloudWatchTrigger.Arn
131 |   LambdaAnalyticsRole: #Execution role for the custom resource
132 |     Type: AWS::IAM::Role
133 |     Properties:
134 |       Path:
135 |         Fn::Sub: /${ResourcePrefix}/
136 |       AssumeRolePolicyDocument:
137 |         Version: 2012-10-17
138 |         Statement:
139 |           - Effect: Allow
140 |             Principal:
141 |               Service:
142 |                 - lambda.amazonaws.com
143 |             Action:
144 |               - sts:AssumeRole
145 |       ManagedPolicyArns:
146 |         - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
147 |   LambdaAnalytics:
148 |     Type: AWS::Lambda::Function
149 |     Properties:
150 |       Runtime: python3.9
151 |       FunctionName: !Sub ${ResourcePrefix}RLS-Analytics
152 |       Handler: index.lambda_handler
153 |       MemorySize: 128
154 |       Role: !GetAtt LambdaAnalyticsRole.Arn
155 |       Timeout: 15
156 |       Environment:
157 |         Variables:
158 |           WA_ANALYTICS_ENDPOINT: https://okakvoavfg.execute-api.eu-west-1.amazonaws.com/
159 |       Code:
160 |         ZipFile: |
161 |           import os
162 |           import json
163 |           import uuid
164 |           import urllib3
165 |           import boto3
166 |           endpoint = os.environ['WA_ANALYTICS_ENDPOINT']
167 |           account_id = boto3.client("sts").get_caller_identity()["Account"]
168 |           def lambda_handler(event, context):
169 |               print(json.dumps(event))
170 |               try:
171 |                   if event['RequestType'].upper() not in ['CREATE', 'UPDATE', 'UPDATE']:
172 |                       raise Exception(f"Unknown RequestType {event['RequestType']}")
173 |                   action = event['RequestType'].upper()
174 |                   method = {'CREATE':'PUT', 'UPDATE': 'PATCH', 'DELETE': 'DELETE'}.get(action)
175 |                   via_key = {'CREATE':'created_via', 'UPDATE': 'updated_via', 'DELETE': 'deleted_via'}.get(action)
176 |                   payload = {'dashboard_id': 'cid/rls-org', 'account_id': account_id, via_key: 'CFN'}
177 |                   r =  urllib3.PoolManager().request(method, endpoint, body=json.dumps(payload).encode('utf-8'), headers={'Content-Type': 'application/json'})
178 |                   if r.status != 200:
179 |                       raise Exception(f"There has been an issue logging action, server did not respond with a 200 response, actual status: {r.status}, response data {r.data.decode('utf-8')}. This issue will be ignored")
180 |                   res, reason = 'SUCCESS', 'success'
181 |               except Exception as exc:
182 |                   res, reason = 'SUCCESS', f"{exc} . This issue will be ignored"
183 |               body = {
184 |                   'Status': res,
185 |                   'Reason': reason,
186 |                   'PhysicalResourceId': event.get('PhysicalResourceId', str(uuid.uuid1())),
187 |                   'StackId': event.get('StackId'),
188 |                   'RequestId': event.get('RequestId'),
189 |                   'LogicalResourceId': event.get('LogicalResourceId'),
190 |                   'NoEcho': False,
191 |                   'Data':  {'Reason': reason},
192 |               }
193 |               json_body=json.dumps(body)
194 |               print(json_body)
195 |               url = event.get('ResponseURL')
196 |               if not url: return
197 |               try:
198 |                   response = urllib3.PoolManager().request('PUT', url, body=json_body, headers={'content-type' : '', 'content-length' : str(len(json_body))}, retries=False)
199 |                   print(f"Status code: {response}")
200 |               except Exception as exc:
201 |                   print("Failed sending PUT to CFN: " + str(exc))
202 |   LambdaAnalyticsExecutor:
203 |     Type: Custom::LambdaAnalyticsExecutor
204 |     Properties:
205 |       ServiceToken: !GetAtt LambdaAnalytics.Arn
206 | 


--------------------------------------------------------------------------------
/rls/utils/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2086
 3 | # This script builds a zip to be uploaded
 4 | 
 5 | code_path=$(git rev-parse --show-toplevel)/rls/deploy
 6 | 
 7 | rm $code_path/create_rls.zip
 8 | zip -j $code_path/create_rls.zip $code_path/create_rls.py
 9 | echo 'Done build'
10 | 


--------------------------------------------------------------------------------
/rls/utils/bump-release.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import git
 3 | import json
 4 | 
 5 | repo = git.Repo('.')
 6 | 
 7 | print(repo.git.execute('git checkout main'.split()))
 8 | print(repo.git.execute('git pull'.split()))
 9 | module = 'rls'
10 | 
11 | 
12 | old_ver = json.load(open(f"{module}/utils/version.json"))['version']
13 | 
14 | print(old_ver)
15 | bump = 'patch'
16 | if len(sys.argv) > 1:
17 |     bump = sys.argv[1]
18 | 
19 | maj, minor, patch = map(int, old_ver.split('.'))
20 | 
21 | if bump == 'patch':
22 |     new_ver = '.'.join(map(str, [maj, minor, patch + 1]))
23 | elif bump == 'minor':
24 |     new_ver = '.'.join(map(str, [maj, minor + 1, 0]))
25 | else:
26 |     raise NotImplementedError('only patch and minor are implemented')
27 | 
28 | print(repo.git.execute(f"git checkout -b release/{new_ver}".split()))
29 | 
30 | 
31 | tx = open("{module}/utils/version.json").read()
32 | with open("{module}/utils/version.json", "w") as f:
33 |     f.write(tx.replace(old_ver, new_ver))
34 | 
35 | 
36 | filenames = [
37 |     f"{module}/deploy/deploy-{module}.yaml",
38 |     f"{module}/utils/version.json",
39 | ]
40 | for filename in filenames:
41 |     tx = open(filename).read()
42 |     with open(filename, "w") as f:
43 |         f.write(tx.replace(f"v{old_ver}", f"v{new_ver}"))
44 | 
45 | 
46 | print(repo.git.execute('git diff HEAD --unified=0'.split()))
47 | 
48 | print('to undo:\n git checkout HEAD -- cfn-templates/cid-cfn.yml cid/_version.py')
49 | print(f"to continue:\n git commit -am 'release {new_ver}'; git push origin 'release/{new_ver}'")
50 | 


--------------------------------------------------------------------------------
/rls/utils/qs_s3_manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fileLocations": [
 3 |         {
 4 |             "URIs": [
 5 |                 "s3://<bucket>/cid_rls/cid_rls.csv",
 6 |             ]
 7 |         }
 8 |     ],
 9 |     "globalUploadSettings": {
10 |         "format": "CSV",
11 |         "delimiter": ",",
12 |         "textqualifier": "\"",
13 |         "containsHeader": "true"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/rls/utils/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2016,SC2086,SC2162
 3 | # This script can be used for release
 4 | 
 5 | export AWS_REGION=us-east-1
 6 | export STACK_SET_NAME=LayerBuckets
 7 | export CENTRAL_BUCKET=aws-managed-cost-intelligence-dashboards
 8 | 
 9 | code_path=$(git rev-parse --show-toplevel)/rls/deploy
10 | 
11 | echo 'building lambda zip'
12 | "$(git rev-parse --show-toplevel)/rls/utils/build.sh"
13 | 
14 | echo "sync to central bucket"
15 | aws s3 sync $code_path/       s3://$CENTRAL_BUCKET/cfn/rls/
16 | 
17 | 
18 | 
19 | aws cloudformation list-stack-instances \
20 |   --stack-set-name $STACK_SET_NAME \
21 |   --query 'Summaries[].[StackId,Region]' \
22 |   --output text |
23 |   while read stack_id region; do
24 |     echo "sync to $region"
25 |     bucket=$(aws cloudformation list-stack-resources --stack-name $stack_id \
26 |       --query 'StackResourceSummaries[?LogicalResourceId == `LayerBucket`].PhysicalResourceId' \
27 |       --region $region --output text)
28 | 
29 |     aws s3 sync $code_path/ s3://$bucket/cfn/rls/ --delete
30 |   done
31 | 
32 | echo 'Done'
33 | 


--------------------------------------------------------------------------------
/rls/utils/tagger/aws_org_tagger_lambda.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import csv
 3 | import logging
 4 | 
 5 | 
 6 | def read_csv_function(filename):
 7 | 
 8 |     file = open(f"{filename}", "r",  encoding='utf-8-sig')
 9 |     data = list(csv.DictReader(file, delimiter=","))
10 |     file.close()
11 | 
12 |     return data
13 | 
14 | def org_function(key,value, account_id):
15 | 
16 |     client = boto3.client('organizations')
17 |     response = client.list_tags_for_resource(
18 |     ResourceId=account_id
19 |     )
20 |     response = client.tag_resource(
21 |         ResourceId=account_id,
22 |         Tags=[
23 |             {
24 |                 'Key': key,
25 |                 'Value': value
26 |             },
27 |         ]
28 |     )
29 |     return response
30 | 
31 | def main():
32 |     filename = 'data.csv'
33 |     key = 'cid_users'
34 |     map_data = read_csv_function(filename)
35 |     
36 |     for line in map_data:
37 |         try: 
38 |             account_id = line['Account ID']
39 |             value = line['cid_users']
40 |             org_function(key, value, account_id)
41 |         except Exception as e:
42 |             logging.info("%s" % e)
43 |             pass
44 | 
45 | def lambda_handler(event, context):
46 |     main()
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 
51 |         
52 | 


--------------------------------------------------------------------------------
/rls/utils/tagger/data.csv:
--------------------------------------------------------------------------------
1 | ﻿Account ID,cid_users
2 | 111122223333,exmaple@email.com
3 | 444455556666,exmaple@email.com:example2@email.com
4 | ou-111112222,exmaple@email.com


--------------------------------------------------------------------------------
/rls/utils/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length=160
3 | 
4 | 


--------------------------------------------------------------------------------
/rls/utils/version.json:
--------------------------------------------------------------------------------
1 | {
2 |     "version": "1.0.1"
3 | }
4 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
1 | 
2 | Please check [CONTRIBUTING GUIDE](https://github.com/awslabs/cid-framework/blob/main/data-collection/CONTRIBUTING.md).
3 | 


--------------------------------------------------------------------------------
/test/cleanup.py:
--------------------------------------------------------------------------------
 1 | ''' cleanup test environment
 2 | '''
 3 | import logging
 4 | 
 5 | import boto3
 6 | 
 7 | from utils import cleanup_stacks, PREFIX
 8 | 
 9 | if __name__ == '__main__':
10 |     logging.basicConfig(level=logging.INFO)
11 |     account_id = boto3.client("sts").get_caller_identity()["Account"]
12 |     cloudformation = boto3.client('cloudformation')
13 | 
14 |     # Sometimes cloud formation deletes a role needed for management of stacksets. For these cases we can create just this role. If it exists stack will fail, but it is ok.
15 |     try:
16 |         cloudformation.delete_stack(StackName='TempDebugCIDStackSets')
17 |         cloudformation.create_stack(
18 |             TemplateBody=open('data-collection/test/debugstackets.yml').read(),
19 |             StackName='TempDebugCIDStackSets',
20 |             Parameters=[
21 |                 {'ParameterKey': 'AdministratorAccountId', 'ParameterValue': account_id}
22 |             ],
23 |             Capabilities=['CAPABILITY_NAMED_IAM'],
24 |         )
25 |     except Exception as exc:
26 |         print(exc)
27 | 
28 |     cleanup_stacks(
29 |         cloudformation=boto3.client('cloudformation'),
30 |         account_id=account_id,
31 |         s3=boto3.resource('s3'),
32 |         s3client=boto3.client('s3'),
33 |         athena=boto3.client('athena'),
34 |         glue=boto3.client('glue'),
35 |     )
36 | 
37 |     cloudformation.delete_stack(StackName='TempDebugCIDStackSets')
38 |     logging.info('Cleanup Done')
39 | 
40 |     # delete all log groups
41 |     logs = boto3.client('logs')
42 |     for log_group in logs.get_paginator('describe_log_groups').paginate(logGroupNamePrefix=f'/aws/lambda/{PREFIX}').search('logGroups'):
43 |         logs.delete_log_group(logGroupName=log_group['logGroupName'])
44 |         print(f"deleted {log_group['logGroupName']}")
45 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import logging
  3 | import os
  4 | 
  5 | 
  6 | import boto3
  7 | import pytest
  8 | 
  9 | from utils import prepare_stacks, cleanup_stacks
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | _start_time = None
 13 | 
 14 | 
 15 | @pytest.fixture(scope='session')
 16 | def session():
 17 |     return boto3.session.Session()
 18 | 
 19 | @pytest.fixture(scope='session')
 20 | def athena():
 21 |     return boto3.client('athena')
 22 | 
 23 | @pytest.fixture(scope='session')
 24 | def lambda_client():
 25 |     return boto3.client('lambda')
 26 | 
 27 | @pytest.fixture(scope='session')
 28 | def support():
 29 |     return boto3.client('support')
 30 | 
 31 | @pytest.fixture(scope='session')
 32 | def cloudformation():
 33 |     return boto3.client('cloudformation')
 34 | 
 35 | 
 36 | @pytest.fixture(scope='session')
 37 | def s3():
 38 |     return boto3.resource('s3')
 39 | 
 40 | @pytest.fixture(scope='session')
 41 | def s3client():
 42 |     return boto3.client('s3')
 43 | 
 44 | @pytest.fixture(scope='session')
 45 | def compute_optimizer():
 46 |     return boto3.client('compute-optimizer')
 47 | 
 48 | 
 49 | @pytest.fixture(scope='session')
 50 | def account_id():
 51 |     return boto3.client("sts").get_caller_identity()["Account"]
 52 | 
 53 | @pytest.fixture(scope='session')
 54 | def org_unit_id():
 55 |     return boto3.client("organizations").list_roots()["Roots"][0]["Id"]
 56 | 
 57 | @pytest.fixture(scope='session')
 58 | def org_unit_id():
 59 |     return boto3.client("organizations").list_roots()["Roots"][0]["Id"]
 60 | 
 61 | @pytest.fixture(scope='session')
 62 | def glue():
 63 |     return boto3.client("glue")
 64 | 
 65 | @pytest.fixture(scope='session')
 66 | def bucket():
 67 |     bucket_name = os.environ.get('bucket')
 68 |     if bucket_name:
 69 |         return bucket_name
 70 |     print('env var `bucket` not found')
 71 |     default_bucket = f'cid-{account_id()}-test'
 72 |     s3 = boto3.client('s3')
 73 |     try:
 74 |         s3.head_bucket(Bucket=default_bucket)
 75 |         return default_bucket
 76 |     except s3.exceptions.ClientError as exc:
 77 |         print(f'bucket {default_bucket} not found in the account. {exc}')
 78 |     raise AssertionError(
 79 |         'You need a bucket to run the tests. Please set bucket env variable '
 80 |         '`export bucket=existing-bucket` or create a default bucket '
 81 |         f'`aws s3api create-bucket --bucket {default_bucket}`'
 82 |     )
 83 | 
 84 | 
 85 | @pytest.fixture(scope='session')
 86 | def start_time():
 87 |     global _start_time
 88 |     if _start_time is None:
 89 |         _start_time = datetime.now()
 90 | 
 91 |     return _start_time
 92 | 
 93 | def pytest_addoption(parser):
 94 |     parser.addoption("--mode", action="store", default="normal", choices=("normal", "no-teardown") )
 95 | 
 96 | @pytest.fixture(scope='session')
 97 | def mode(request):
 98 |     return request.config.getoption("--mode")
 99 | 
100 | @pytest.fixture(scope='session', autouse=True)
101 | def prepare_setup(athena, cloudformation, s3, s3client, account_id, org_unit_id, bucket, start_time, mode, glue):
102 |     yield prepare_stacks(cloudformation=cloudformation, account_id=account_id, org_unit_id=org_unit_id, bucket=bucket, s3=s3, s3client=s3client)
103 | 
104 |     mode = pytest.params.get('mode', mode)
105 |     if mode != "no-teardown":
106 |         cleanup_stacks(cloudformation=cloudformation, account_id=account_id, s3=s3, s3client=s3client, athena=athena, glue=glue)


--------------------------------------------------------------------------------
/test/debugstackets.yml:
--------------------------------------------------------------------------------
 1 | AWSTemplateFormatVersion: 2010-09-09
 2 | Description: Configure the StackSetAdminRole to enable use of AWS CloudFormation StackSets.
 3 | 
 4 | Parameters:
 5 |   AdministratorAccountId:
 6 |     Type: String
 7 |     Description: AWS Account Id of the administrator account (the account in which StackSets will be created).
 8 |     MaxLength: 12
 9 |     MinLength: 12
10 | 
11 | Resources:
12 |   ExecutionRole:
13 |     Type: AWS::IAM::Role
14 |     Properties:
15 |       RoleName: CID-DC-ComputeOptimizer-StackSetExecutionRole
16 |       AssumeRolePolicyDocument:
17 |         Version: 2012-10-17
18 |         Statement:
19 |           - Effect: Allow
20 |             Principal:
21 |               AWS:
22 |                 - !Ref AdministratorAccountId
23 |             Action:
24 |               - sts:AssumeRole
25 |       Path: /
26 |       ManagedPolicyArns:
27 |         - !Sub arn:${AWS::Partition}:iam::aws:policy/AdministratorAccess
28 | 
29 |   AdministrationRole:
30 |     Type: AWS::IAM::Role
31 |     Properties:
32 |       RoleName: CID-DC-StackSetAdminRole
33 |       AssumeRolePolicyDocument:
34 |         Version: 2012-10-17
35 |         Statement:
36 |           - Effect: Allow
37 |             Principal:
38 |               Service: cloudformation.amazonaws.com
39 |             Action:
40 |               - sts:AssumeRole
41 |       Path: /
42 |       Policies:
43 |         - PolicyName: AssumeRole-CID-DC-ComputeOptimizer-StackSetExecutionRole
44 |           PolicyDocument:
45 |             Version: 2012-10-17
46 |             Statement:
47 |               - Effect: Allow
48 |                 Action:
49 |                   - sts:AssumeRole
50 |                 Resource:
51 |                   - "arn:*:iam::*:role/CID-DC-ComputeOptimizer-StackSetExecutionRole"


--------------------------------------------------------------------------------
/test/run-test-from-scratch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # see ../CONTRIBUTION.md
 3 | 
 4 | # vars
 5 | account_id=$(aws sts get-caller-identity --query "Account" --output text )
 6 | bucket=cid-$account_id-test
 7 | export bucket
 8 | 
 9 | # upload files
10 | ./data-collection/utils/upload.sh  "$bucket"
11 | 
12 | # run test
13 | python3 ./test/test_from_scratch.py "$@"


--------------------------------------------------------------------------------
/utils/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2086,SC2181
 3 | # This script runs cfn-lint cfn_nag_scan and checkov for all templates in folder
 4 | 
 5 | RED='\033[0;31m'
 6 | GREEN='\033[0;32m'
 7 | YELLOW='\033[0;33m'
 8 | NC='\033[0m' # No Color
 9 | 
10 | folder=$(git rev-parse --show-toplevel)
11 | success_count=0
12 | failure_count=0
13 | 
14 | # CKV_AWS_18 - Ensure AWS access logging is enabled on S3 buckets - Public is not publically shared, and access is limited to QS and account Admins thus logging is not required. Also avoid additional costs. 
15 | # CKV_AWS_116 - Ensure the S3 bucket has logging enabled - Public is not publically shared, and access is limited to QS and account Admins thus logging is not required. Also avoid additional costs. 
16 | # CKV_AWS_117 - Ensure AWS Lambda function is configured inside a VPC - Not requied for Lambda functionality as only AWS API calls are used. 
17 | # CKV_AWS_173 - Check encryption settings for Lambda environmental variable - No sensitive parameters in environmental variables
18 | # CKV_AWS_195 - Ensure Glue component has a security configuration associated - AWS managed encryption is used for s3.
19 | # CKV_SECRET_6 - Base64 High Entropy String - Remove false positives
20 | # CKV_AWS_115 - Ensure that AWS Lambda function is configured for function-level concurrent execution limit - No need for concurency reservation
21 | # CKV_AWS_158 - Ensure that CloudWatch Log Group is encrypted by KMS - No need as there no sesible information in the logs
22 | checkov_skip=CKV_AWS_18,CKV_AWS_117,CKV_AWS_116,CKV_AWS_173,CKV_AWS_195,CKV_SECRET_6,CKV_AWS_115,CKV_AWS_158
23 | 
24 | export exclude_files=("module-inventory.yaml" "module-pricing.yaml" "module-backup.yaml") # For::Each breaks lint :'(
25 | 
26 | yaml_files=$(find "$folder" -type f -name "*.yaml" -exec ls -1t "{}" +;) # ordered by date
27 | 
28 | for file in $yaml_files; do
29 |     echo "Linting $(basename $file)"
30 |     fail=0
31 | 
32 |     # checkov
33 |     output=$(eval checkov  --skip-download --skip-check $checkov_skip --quiet -f "$file")
34 |     if [ $? -ne 0 ]; then
35 |         echo "$output" | awk '{ print "\t" $0 }'
36 |         echo -e "checkov      ${RED}KO${NC}"  | awk '{ print "\t" $0 }'
37 |         fail=1
38 |     else
39 |         echo -e "checkov      ${GREEN}OK${NC}"  | awk '{ print "\t" $0 }'
40 |     fi
41 | 
42 |     # cfn-lint
43 |     output=$(eval cfn-lint -- "$file")
44 |     if [ $? -ne 0 ]; then
45 |         echo "$output" | awk '{ print "\t" $0 }'
46 |         echo -e "cfn-lint     ${RED}KO${NC}"  | awk '{ print "\t" $0 }'
47 |         fail=1
48 |     else
49 |         echo -e "cfn-lint     ${GREEN}OK${NC}"  | awk '{ print "\t" $0 }'
50 |     fi
51 | 
52 |     if [ "$(basename $file)" == "${exclude_files[0]}" ] || [ "$(basename $file)" == "${exclude_files[1]}" ] || [ "$(basename $file)" == "${exclude_files[2]}" ]; then
53 |         echo -e "cfn_nag_scan ${YELLOW}SKIP${NC} For::Each breaks cfn_nag"  | awk '{ print "\t" $0 }'
54 |         continue
55 |     fi
56 | 
57 | 
58 |     # cfn_nag_scan
59 |     output=$(eval cfn_nag_scan --input-path "$file")
60 |     if [ $? -ne 0 ]; then
61 |         echo "$output" | awk '{ print "\t" $0 }'
62 |         echo -e "cfn_nag_scan ${RED}KO${NC}"  | awk '{ print "\t" $0 }'
63 |         fail=1
64 |     else
65 |         echo -e "cfn_nag_scan ${GREEN}OK${NC}"  | awk '{ print "\t" $0 }'
66 |     fi
67 | 
68 |     if [ $fail -ne 0 ]; then
69 |         ((failure_count++))
70 |     else
71 |         ((success_count++))
72 |     fi
73 | done
74 | 
75 | echo "Successful lints: $success_count"
76 | echo "Failed lints:     $failure_count"
77 | if [ $failure_count -ne 0 ]; then
78 |     exit 1
79 | else
80 |     exit 0
81 | fi
82 | 


--------------------------------------------------------------------------------
/utils/pylint.py:
--------------------------------------------------------------------------------
 1 | """ This script shows pylint for all Lambda Functions with ZipFile code in yaml
 2 | 
 3 | """
 4 | import os
 5 | import glob
 6 | import subprocess
 7 | 
 8 | import cfn_tools # pip install cfn-flip
 9 | 
10 | 
11 | FOLDER_PATH = 'data-collection/deploy/'
12 | TMP_DIR  = '.tmp'
13 | PYLINT_DISABLE = [
14 |     'C0301', # Line too long
15 |     'C0103', # Invalid name of module
16 |     'C0114', # Missing module docstring
17 |     'C0116', # Missing function or method docstring
18 |     'W1203', # Use lazy % formatting in logging functions (logging-fstring-interpolation)
19 |     'W1201', # Use lazy % formatting in logging functions (logging-not-lazy)
20 | ]
21 | BANDIT_SKIP = [
22 |     'B101', # Assert
23 |     'B108', # Hardcoded_tmp_directory
24 | ]
25 | 
26 | def pylint(filename):
27 |     """ call pylint """
28 |     try:
29 |         res = subprocess.check_output(
30 |             f'pylint {filename} --disable {",".join(PYLINT_DISABLE)}'.split(),
31 |             stderr=subprocess.PIPE,
32 |             universal_newlines=True,
33 |         )
34 |         return res
35 |     except subprocess.CalledProcessError as exc:
36 |         return exc.stdout
37 | 
38 | def bandit(filename):
39 |     """ call bandit """
40 |     try:
41 |         res = subprocess.check_output(
42 |             f'bandit {filename} --skip {",".join(BANDIT_SKIP)}'.split(),
43 |             stderr=subprocess.PIPE,
44 |             universal_newlines=True,
45 |         )
46 |         if 'No issues identified.' in str(res):
47 |             return 'Bandit: No issues identified.' # skip verbose
48 |         return res
49 |     except subprocess.CalledProcessError as exc:
50 |         return exc.stdout
51 | 
52 | def tab(text, indent="\t"):
53 |     """ returns text with a tab """
54 |     return '\n'.join([indent + line for line in text.splitlines()])
55 | 
56 | def main():
57 |     """ run pylint for all lambda functions """
58 |     file_list = glob.glob(os.path.join(FOLDER_PATH, "*.yaml"))
59 |     file_list.sort(key=os.path.getmtime, reverse=True)
60 |     for filename in file_list:
61 |         try:
62 |             with open(filename, encoding='utf-8') as template_file:
63 |                 template = cfn_tools.load_yaml(template_file.read())
64 |         except Exception:
65 |             print(f'failed to load {filename}')
66 |             continue
67 |         for name, res in template['Resources'].items():
68 |             if isinstance(res, dict) and res['Type'] == 'AWS::Lambda::Function':
69 |                 code = res.get('Properties', {}).get('Code', {}).get('ZipFile')
70 |                 if not code:
71 |                     continue
72 |                 code_dir =  TMP_DIR + '/' + os.path.basename(filename).rsplit('.', 1)[0] + "/" + name + '/'
73 |                 os.makedirs(code_dir, exist_ok=True)
74 | 
75 |                 py_fn = code_dir + '/code.py'
76 |                 with open(py_fn, 'w', encoding='utf-8') as py_f:
77 |                     py_f.write(code)
78 |                 print(filename, name)
79 |                 print(tab(pylint(py_fn)))
80 |                 print(tab(bandit(py_fn)))
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main()
85 | 


--------------------------------------------------------------------------------